Documentations edgeR can be found here.
Read file with read counts.
library(edgeR)
library(locfit)
Counts<-read.table(file="Dsim_count_table.tsv", header=T, row.names=1)
Number of genes:
dim(Counts)
## [1] 12990 12
Table:
head(Counts)
## C15_r1 C15_r2 C15_r3 C23_r1 C23_r2 C23_r3 W15_r1 W15_r2 W15_r3
## FBgn0000008 41 66 52 49 64 39 46 39 60
## FBgn0000014 7 7 15 12 9 5 9 5 19
## FBgn0000015 4 5 1 4 4 3 6 0 2
## FBgn0000017 150 218 203 360 492 267 204 195 200
## FBgn0000018 0 5 2 2 7 2 1 0 4
## FBgn0000022 0 0 0 0 0 0 0 0 0
## W23_r1 W23_r2 W23_r3
## FBgn0000008 118 386 184
## FBgn0000014 32 95 13
## FBgn0000015 5 89 13
## FBgn0000017 430 1136 833
## FBgn0000018 5 62 33
## FBgn0000022 0 0 0
Keep those genes that were expressed in at a reasonable level (25 pairs) in all samples.
KeptCounts=Counts[rowSums(Counts>=25)==12, ]
Number of kept genes:
dim(KeptCounts)
## [1] 7402 12
“Treatment” groups.
Group_PW=factor(c("C23","C23","C23","W23","W23","W23"))
Group_PW
## [1] C23 C23 C23 W23 W23 W23
## Levels: C23 W23
See the description of factor
type here.
Differential expression list. Comparing Cool 23 to Warm 23.
DE_list_PW=DGEList(KeptCounts[,c(4:6, 10:12)], group=Group_PW)
TMM (Trimmed Mean of M-values) normalization of read counts.
DE_list_PW=calcNormFactors(DE_list_PW, method=c("TMM"))
Estimating dispersion.
DE_list_PW=estimateDisp(DE_list_PW)
## Design matrix not provided. Switch to the classic mode.
DE_list_PW_DE=exactTest(DE_list_PW, pair=c("C23","W23"))
DE_list_PW_DE
## An object of class "DGEExact"
## $table
## logFC logCPM PValue
## FBgn0000008 1.6686491 2.721447 0.0002964316
## FBgn0000017 0.6252065 4.838898 0.1024458267
## FBgn0000024 1.2227699 2.414934 0.0096324462
## FBgn0000032 -0.4583214 6.090728 0.1839443531
## FBgn0000038 1.9157808 2.978131 0.0001130885
## 7397 more rows ...
##
## $comparison
## [1] "C23" "W23"
##
## $genes
## NULL
Write result table to a tsv file.
write.table(DE_list_PW_DE$table, "C23vsW23_results.tsv", sep="\t")
See if a gene was DE significantly - after Benjamini-Hochberg p-value correction.
PairWise_DE_up.down=decideTestsDGE(DE_list_PW_DE, p=0.05, adjust="BH")
rownames(PairWise_DE_up.down)=rownames(KeptCounts)
head(PairWise_DE_up.down)
## [,1]
## FBgn0000008 1
## FBgn0000017 0
## FBgn0000024 1
## FBgn0000032 0
## FBgn0000038 1
## FBgn0000039 1
Number of DE genes:
summary(PairWise_DE_up.down)
## [,1]
## -1 1862
## 0 3643
## 1 1897
Significantly Up and Down regulated genes:
PariWise_DE_up_genes=names(PairWise_DE_up.down[PairWise_DE_up.down[,1]==1,])
PariWise_DE_down_genes=names(PairWise_DE_up.down[PairWise_DE_up.down[,1]==-1,])
Write DE genes and all genes (background) to files.
write(PariWise_DE_up_genes, "PariWise_C23_W23_DE_up_genes")
write(PariWise_DE_down_genes, "PariWise_C23_W23_DE_down_genes")
write(rownames(KeptCounts), "Background_genes")
“Treatment” groups.
Group_GLM=factor(c("C15", "C15", "C15", "C23", "C23", "C23", "W15", "W15", "W15", "W23", "W23", "W23"))
Gene expression lists.
DE_list_GLM=DGEList(KeptCounts, group=Group_GLM)
Design groups.
A_C.W=factor(c("C","C","C","C","C","C", "W","W","W","W","W","W")) # Adaptation to Cold and Warm environments
D_15.23=factor(c("15","15","15", "23","23","23", "15","15","15", "23","23","23")) # Developmental temperature: 15°C and 23°C
Design_AmD=model.matrix(~A_C.W*D_15.23) # Multiplicative model
Design_AmD
## (Intercept) A_C.WW D_15.2323 A_C.WW:D_15.2323
## 1 1 0 0 0
## 2 1 0 0 0
## 3 1 0 0 0
## 4 1 0 1 0
## 5 1 0 1 0
## 6 1 0 1 0
## 7 1 1 0 0
## 8 1 1 0 0
## 9 1 1 0 0
## 10 1 1 1 1
## 11 1 1 1 1
## 12 1 1 1 1
## attr(,"assign")
## [1] 0 1 2 3
## attr(,"contrasts")
## attr(,"contrasts")$A_C.W
## [1] "contr.treatment"
##
## attr(,"contrasts")$D_15.23
## [1] "contr.treatment"
GLM dispersions.
DE_list_GLM=estimateDisp(DE_list_GLM, design=Design_AmD)
Fitting GLM model and likelihood ratio tests (LRT).
DE_list_GLM_fit=glmFit(DE_list_GLM, design=Design_AmD)
DE_list_GLM_fit_vA=glmLRT(DE_list_GLM_fit, coef=2) # Adaptation
DE_list_GLM_fit_vD=glmLRT(DE_list_GLM_fit, coef=3) # Developmental temperature
DE_list_GLM_fit_VA.D=glmLRT(DE_list_GLM_fit, coef=4) # Interaction of Adaptation and Developmental temperature
DE genes - GLM.
# Adaptation
DE_list_GLM_fit_vA_up.down=as.matrix(decideTestsDGE(DE_list_GLM_fit_vA, p=0.05, adjust="BH"))
rownames(DE_list_GLM_fit_vA_up.down)=rownames(KeptCounts)
# Developmental temperature
DE_list_GLM_fit_vD_up.down=as.matrix(decideTestsDGE(DE_list_GLM_fit_vD, p=0.05, adjust="BH"))
rownames(DE_list_GLM_fit_vD_up.down)=rownames(KeptCounts)
# Interaction of Adaptation and Developmental temperature
DE_list_GLM_fit_vA.D_up.down=as.matrix(decideTestsDGE(DE_list_GLM_fit_VA.D, p=0.05, adjust="BH"))
rownames(DE_list_GLM_fit_vA.D_up.down)=rownames(KeptCounts)
Number of DE genes - GLM.
summary(DE_list_GLM_fit_vA_up.down) # Adaptation
## [,1]
## -1 0
## 0 7402
## 1 0
summary(DE_list_GLM_fit_vD_up.down) # Developmental temperature
## [,1]
## -1 966
## 0 4708
## 1 1728
summary(DE_list_GLM_fit_vA.D_up.down) # Interaction of Adaptation and Developmental temperature
## [,1]
## -1 994
## 0 4421
## 1 1987
Significantly Up and Down regulated genes.
GLM_vD_DE_up_genes=names(DE_list_GLM_fit_vD_up.down[DE_list_GLM_fit_vD_up.down[,1]==1,]) # Developmental temperature Up
GLM_vD_DE_down_genes=names(DE_list_GLM_fit_vD_up.down[DE_list_GLM_fit_vD_up.down[,1]==-1,]) # Developmental temperature Down
GLM_vA.D_DE_up_genes=names(DE_list_GLM_fit_vA.D_up.down[DE_list_GLM_fit_vA.D_up.down[,1]==1,]) # Interaction Up
GLM_vA.D_DE_down_genes=names(DE_list_GLM_fit_vA.D_up.down[DE_list_GLM_fit_vA.D_up.down[,1]==-1,]) # Interaction Down
Write DE genes to files.
write(GLM_vD_DE_up_genes, "GLM_DevTemp_DE_up_genes")
write(GLM_vD_DE_down_genes, "GLM_DevTemp_DE_down_genes")
write(GLM_vA.D_DE_up_genes, "GLM_Interaction_DE_up_genes")
write(GLM_vA.D_DE_down_genes, "GLM_Interaction_DE_down_genes")
edgeR
DE result table, you can find here (C23vsW23_result.tab; or use your own at the C23 - W23 comparison).Calculate the Log2 Fold Change of gene expression on the original counts (non-normalized and not pseudo.counts) between C15 and C23. Do not use edgeR, but write your own script. The logFC should be negative (down-regulated) if the gene expression at 15 ° C was greater than 23 ° C!