DESeq2
After generating a gene by sample expression matrix, we need to create a data.frame with sample-level information which will be used to generate the groups to perform differential expresison on.
# create sample-level data.frame
sample.info <- data.frame(sample_id = XX, group = XX)
# convert group variable to a factor (NOTE: the first level will be the reference group)
sample.info$group <- as.factor(sample.info$group)
# set row names as sample_id
rownames(sample.info) <- as.character(sample.info$sample_id)
Some minor formatting…
# make sure all samples in the expression matrix are in the sample-level info data.frame
exp.mat <- exp.mat[ , which(names(exp.mat) %in% sample.info$sample_id)]
# order columns of expression matrix by order of sample IDs in sample-level info data.frame
exp.mat <- exp.mat[ , rownames(sample.info)]
# sanity check
all(rownames(sample.info) == colnames(exp.mat))
Although setup is the same, this is where DESeq2 starts to differ from edgeR…
dds <- DESeqDataSetFromMatrix(countData = exp.mat,
colData = sample.info,
design = ~ cohort)
featureData <- data.frame(gene=rownames(exp.mat))
mcols(dds) <- DataFrame(mcols(dds), featureData)
mcols(dds)
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]
dds$cohort <- relevel(dds$cohort, ref = "non_twt")
dds <- DESeq(dds)
res <- results(dds)
- Previous
- Next