Documentations edgeR can be found here.

Preprocessing

About data

Load data

Read file with read counts.

library(edgeR)
library(locfit)
Counts<-read.table(file="Dsim_count_table.tsv", header=T, row.names=1)

Number of genes:

dim(Counts)
## [1] 12990    12

Table:

head(Counts)
##             C15_r1 C15_r2 C15_r3 C23_r1 C23_r2 C23_r3 W15_r1 W15_r2 W15_r3
## FBgn0000008     41     66     52     49     64     39     46     39     60
## FBgn0000014      7      7     15     12      9      5      9      5     19
## FBgn0000015      4      5      1      4      4      3      6      0      2
## FBgn0000017    150    218    203    360    492    267    204    195    200
## FBgn0000018      0      5      2      2      7      2      1      0      4
## FBgn0000022      0      0      0      0      0      0      0      0      0
##             W23_r1 W23_r2 W23_r3
## FBgn0000008    118    386    184
## FBgn0000014     32     95     13
## FBgn0000015      5     89     13
## FBgn0000017    430   1136    833
## FBgn0000018      5     62     33
## FBgn0000022      0      0      0

Filtering

Keep those genes that were expressed in at a reasonable level (25 pairs) in all samples.

KeptCounts=Counts[rowSums(Counts>=25)==12, ]

Number of kept genes:

dim(KeptCounts)
## [1] 7402   12

Pair-wise (PW) differential expression (DE) analysis of expressions measured at 23°C

“Treatment” groups.

Group_PW=factor(c("C23","C23","C23","W23","W23","W23"))
Group_PW
## [1] C23 C23 C23 W23 W23 W23
## Levels: C23 W23

See the description of factor type here.

Differential expression list. Comparing Cool 23 to Warm 23.

DE_list_PW=DGEList(KeptCounts[,c(4:6, 10:12)], group=Group_PW)

TMM (Trimmed Mean of M-values) normalization of read counts.

DE_list_PW=calcNormFactors(DE_list_PW, method=c("TMM"))

Estimating dispersion.

DE_list_PW=estimateDisp(DE_list_PW)
## Design matrix not provided. Switch to the classic mode.

Differential expression testing

DE_list_PW_DE=exactTest(DE_list_PW, pair=c("C23","W23"))
DE_list_PW_DE
## An object of class "DGEExact"
## $table
##                  logFC   logCPM       PValue
## FBgn0000008  1.6686491 2.721447 0.0002964316
## FBgn0000017  0.6252065 4.838898 0.1024458267
## FBgn0000024  1.2227699 2.414934 0.0096324462
## FBgn0000032 -0.4583214 6.090728 0.1839443531
## FBgn0000038  1.9157808 2.978131 0.0001130885
## 7397 more rows ...
## 
## $comparison
## [1] "C23" "W23"
## 
## $genes
## NULL

Write result table to a tsv file.

write.table(DE_list_PW_DE$table, "C23vsW23_results.tsv", sep="\t")

See if a gene was DE significantly - after Benjamini-Hochberg p-value correction.

PairWise_DE_up.down=decideTestsDGE(DE_list_PW_DE, p=0.05, adjust="BH")
rownames(PairWise_DE_up.down)=rownames(KeptCounts)
head(PairWise_DE_up.down)
##             [,1]
## FBgn0000008    1
## FBgn0000017    0
## FBgn0000024    1
## FBgn0000032    0
## FBgn0000038    1
## FBgn0000039    1

Number of DE genes:

summary(PairWise_DE_up.down)
##    [,1]
## -1 1862
## 0  3643
## 1  1897

Significantly Up and Down regulated genes:

PariWise_DE_up_genes=names(PairWise_DE_up.down[PairWise_DE_up.down[,1]==1,])
PariWise_DE_down_genes=names(PairWise_DE_up.down[PairWise_DE_up.down[,1]==-1,])

Write DE genes and all genes (background) to files.

write(PariWise_DE_up_genes, "PariWise_C23_W23_DE_up_genes")
write(PariWise_DE_down_genes, "PariWise_C23_W23_DE_down_genes")
write(rownames(KeptCounts), "Background_genes")

DE analysis with GLM – Generalized Linear Model – of all groups

“Treatment” groups.

Group_GLM=factor(c("C15", "C15", "C15", "C23", "C23", "C23", "W15", "W15", "W15", "W23", "W23", "W23"))

Gene expression lists.

DE_list_GLM=DGEList(KeptCounts, group=Group_GLM)

Design groups.

A_C.W=factor(c("C","C","C","C","C","C", "W","W","W","W","W","W")) # Adaptation to Cold and Warm environments
D_15.23=factor(c("15","15","15", "23","23","23", "15","15","15", "23","23","23")) # Developmental temperature: 15°C and 23°C
Design_AmD=model.matrix(~A_C.W*D_15.23) # Multiplicative model
Design_AmD
##    (Intercept) A_C.WW D_15.2323 A_C.WW:D_15.2323
## 1            1      0         0                0
## 2            1      0         0                0
## 3            1      0         0                0
## 4            1      0         1                0
## 5            1      0         1                0
## 6            1      0         1                0
## 7            1      1         0                0
## 8            1      1         0                0
## 9            1      1         0                0
## 10           1      1         1                1
## 11           1      1         1                1
## 12           1      1         1                1
## attr(,"assign")
## [1] 0 1 2 3
## attr(,"contrasts")
## attr(,"contrasts")$A_C.W
## [1] "contr.treatment"
## 
## attr(,"contrasts")$D_15.23
## [1] "contr.treatment"

GLM dispersions.

DE_list_GLM=estimateDisp(DE_list_GLM, design=Design_AmD)

Fitting GLM model and likelihood ratio tests (LRT).

DE_list_GLM_fit=glmFit(DE_list_GLM, design=Design_AmD)
DE_list_GLM_fit_vA=glmLRT(DE_list_GLM_fit, coef=2) # Adaptation
DE_list_GLM_fit_vD=glmLRT(DE_list_GLM_fit, coef=3) # Developmental temperature
DE_list_GLM_fit_VA.D=glmLRT(DE_list_GLM_fit, coef=4) # Interaction of Adaptation and Developmental temperature

DE genes - GLM.

# Adaptation
DE_list_GLM_fit_vA_up.down=as.matrix(decideTestsDGE(DE_list_GLM_fit_vA, p=0.05, adjust="BH"))
rownames(DE_list_GLM_fit_vA_up.down)=rownames(KeptCounts)

# Developmental temperature
DE_list_GLM_fit_vD_up.down=as.matrix(decideTestsDGE(DE_list_GLM_fit_vD, p=0.05, adjust="BH"))
rownames(DE_list_GLM_fit_vD_up.down)=rownames(KeptCounts)

# Interaction of Adaptation and Developmental temperature
DE_list_GLM_fit_vA.D_up.down=as.matrix(decideTestsDGE(DE_list_GLM_fit_VA.D, p=0.05, adjust="BH"))
rownames(DE_list_GLM_fit_vA.D_up.down)=rownames(KeptCounts)

Number of DE genes - GLM.

summary(DE_list_GLM_fit_vA_up.down) # Adaptation
##    [,1]
## -1    0
## 0  7402
## 1     0
summary(DE_list_GLM_fit_vD_up.down) # Developmental temperature
##    [,1]
## -1  966
## 0  4708
## 1  1728
summary(DE_list_GLM_fit_vA.D_up.down) # Interaction of Adaptation and Developmental temperature
##    [,1]
## -1  994
## 0  4421
## 1  1987

Significantly Up and Down regulated genes.

GLM_vD_DE_up_genes=names(DE_list_GLM_fit_vD_up.down[DE_list_GLM_fit_vD_up.down[,1]==1,]) # Developmental temperature Up
GLM_vD_DE_down_genes=names(DE_list_GLM_fit_vD_up.down[DE_list_GLM_fit_vD_up.down[,1]==-1,]) # Developmental temperature Down
GLM_vA.D_DE_up_genes=names(DE_list_GLM_fit_vA.D_up.down[DE_list_GLM_fit_vA.D_up.down[,1]==1,]) # Interaction Up
GLM_vA.D_DE_down_genes=names(DE_list_GLM_fit_vA.D_up.down[DE_list_GLM_fit_vA.D_up.down[,1]==-1,]) # Interaction Down

Write DE genes to files.

write(GLM_vD_DE_up_genes, "GLM_DevTemp_DE_up_genes")
write(GLM_vD_DE_down_genes, "GLM_DevTemp_DE_down_genes")
write(GLM_vA.D_DE_up_genes, "GLM_Interaction_DE_up_genes")
write(GLM_vA.D_DE_down_genes, "GLM_Interaction_DE_down_genes")

Exercise

Task 1

  1. Read the edgeR DE result table, you can find here (C23vsW23_result.tab; or use your own at the C23 - W23 comparison).
  2. Add a column of plots with the Benjamini-Hochberg corrected p-values. Help: Use this p.adjust command to calculate this.
  3. Create a new variable that contains lines of data.frame where there are significantly up-regulated genes beside the corrected p-value. (up-regulated = W23 expressed more strongly than in C23, i.e. logFC positive) How many such genes are there?
  4. Do the same with down-regulated genes.

Task 2

Calculate the Log2 Fold Change of gene expression on the original counts (non-normalized and not pseudo.counts) between C15 and C23. Do not use edgeR, but write your own script. The logFC should be negative (down-regulated) if the gene expression at 15 ° C was greater than 23 ° C!