まずはホームディレクトリを設定します。

home <- [配布したフォルダをコピーしたパス]
setwd(home)

分散分析について説明します。t検定は分散分析の特殊ケースで、t検定では二群の平均値を比較する問題でしたが、分散分析では、より一般的に二つ以上の群間で平均値を比較することができます。例えば、以下のように、3つの条件(例えば、薬剤の投与条件など)における各遺伝子の発現量を3サンプルずつ測定す、条件間で平均値の異なるような遺伝子群を同定したいときに、分散分析を用いることができます。

set.seed(100)
n <- 10
p <- 5
cond1 <- matrix(rnbinom(n*p,2,mu = 100),ncol=p,dimnames=list(paste0("Gene",1:n),paste0("Condition",rep(1,p))))
cond2 <- matrix(rnbinom(n*p,2,mu = 100),ncol=p,dimnames=list(paste0("Gene",1:n),paste0("Condition",rep(2,p))))
cond3 <- matrix(rnbinom(n*p,2,mu = 300),ncol=p,dimnames=list(paste0("Gene",1:n),paste0("Condition",rep(3,p))))
data_example <- cbind(cond1,cond2,cond3)
Condition1 Condition1 Condition1 Condition1 Condition1 Condition2 Condition2 Condition2 Condition2 Condition2 Condition3 Condition3 Condition3 Condition3 Condition3
Gene1 36 28 155 49 186 87 58 87 46 33 366 67 321 253 542
Gene2 140 78 66 47 149 163 246 54 96 135 392 50 863 450 437
Gene3 90 44 20 49 16 102 3 233 58 49 543 206 543 510 1015
Gene4 100 65 87 68 71 127 64 128 52 284 696 343 293 175 370
Gene5 81 104 58 192 25 345 62 189 39 45 696 361 137 151 282
Gene6 72 113 29 156 61 89 37 294 125 169 1162 102 547 137 204
Gene7 40 144 124 91 28 24 141 70 29 166 186 324 195 64 272
Gene8 172 263 76 36 85 37 96 228 7 195 394 186 1046 459 209
Gene9 25 47 5 100 92 86 75 46 329 121 540 47 561 67 288
Gene10 99 52 73 85 210 93 43 89 62 119 64 126 586 357 345

一つ目の遺伝子(Gene1)に対して

(groups <- colnames(data_example))
 [1] "Condition1" "Condition1" "Condition1" "Condition1" "Condition1" "Condition2" "Condition2" "Condition2" "Condition2" "Condition2" "Condition3" "Condition3"
[13] "Condition3" "Condition3" "Condition3"
(x <- data_example[1,])
Condition1 Condition1 Condition1 Condition1 Condition1 Condition2 Condition2 Condition2 Condition2 Condition2 Condition3 Condition3 Condition3 Condition3 Condition3 
        36         28        155         49        186         87         58         87         46         33        366         67        321        253        542 
(x2 <- data.frame(expr=x,groups=groups))
(lm_fit <- lm(expr~0 + factor(groups),x2))

Call:
lm(formula = expr ~ 0 + factor(groups), data = x2)

Coefficients:
factor(groups)Condition1  factor(groups)Condition2  factor(groups)Condition3  
                    90.8                      62.2                     309.8  
lm_fit$contrasts
$`factor(groups)`
[1] "contr.treatment"
(result <- anova(lm_fit))
Analysis of Variance Table

Response: expr
               Df Sum Sq Mean Sq F value    Pr(>F)    
factor(groups)  3 540448  180149  15.052 0.0002274 ***
Residuals      12 143620   11968                      
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
result$`Pr(>F)`[1]
[1] 0.0002274161

いま、Gene1に対して、

groups <- colnames(data_example)
pvals <- NULL
for(i in 1:n){
x <- data_example[i,]
x2 <- data.frame(expr=x,groups=groups)
lm_fit <- lm(expr~0 + factor(groups),x2)
result <- anova(lm_fit)
pvals[i] <- result$`Pr(>F)`[1]
}
(qvals <- p.adjust(pvals,method="BH"))
 [1] 0.0005685403 0.0011965310 0.0003635731 0.0003635731 0.0029166489 0.0170762568 0.0003635731 0.0032017675 0.0053416020 0.0015868761

もし、二群間の比較であれば、t-検定と同じ結果を与えます。

data_example2 <- cbind(cond1,cond3)
groups <- colnames(data_example2)
pvals_anova <- NULL
for(i in 1:n){
x <- data_example2[i,]
x2 <- data.frame(expr=x,groups=groups)
lm_fit <- lm(expr~factor(groups),x2)
result <- anova(lm_fit)
pvals_anova[i] <- result$`Pr(>F)`[1]
}
(qvals_anova <- p.adjust(pvals_anova,method="BH"))
 [1] 0.07687011 0.07687011 0.03995054 0.04594354 0.08388471 0.12430750 0.07687011 0.09120758 0.08388471 0.09269908
pvals_t <- NULL
for(i in 1:n){
  x <- data_example2[i,1:5]
  y <- data_example2[i,6:10]
  pvals_t[i] <- t.test(x,y,var.equal = TRUE)$p.value
}
(qvals_t <- p.adjust(pvals_t,method="BH"))
 [1] 0.07687011 0.07687011 0.03995054 0.04594354 0.08388471 0.12430750 0.07687011 0.09120758 0.08388471 0.09269908
round(pvals_anova,3)==round(pvals_t,3) 
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
round(qvals_anova,3)==round(qvals_t,3)
 [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

 つまりt検定は分散分析の特殊ケースです。また、分散分析では複数条件間の差異を一度に検定できるだけでなく、条件間の交互作用なども自在にモデリングできるため、実験デザインに合わせてモデル化することが可能である点も重要です。

LS0tDQp0aXRsZTogIpOdjHaTSYm8kOCMn5LogWmVqo5VlaqQzYFqIg0KYXV0aG9yOiAiWXVzdWtlIE1hdHN1aSINCmFmZmlsaWF0aW9uOiAilryMw4mukeWKd4NWg1iDZYOAkLaVqIp3laqW7CINCmRhdGU6ICKNxY9JjViQVpP6gUYyMDE3lE41jI4xN5P6Ig0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KgtyCuILNg3qBW4OAg2aDQoOMg06DZ4OKgvCQ3ZLogrWC3IK3gUINCmBgYHtyLGV2YWw9Rn0NCmhvbWUgPC0gW5R6lXqCtYK9g3SDSIOLg1+C8INSg3OBW4K1gr2DcINYXQ0Kc2V0d2QoaG9tZSkNCmBgYA0KDQoNCmBgYHtyLGVjaG89Rn0NCmxpYnJhcnkoa25pdHIpDQpgYGANCpWqjlWVqpDNgsmCwoKigsSQ4Ja+grWC3IK3gUJ0jJ+S6ILNlaqOVZWqkM2CzJPBjuqDUIFbg1iCxYFBdIyfkuiCxYLNk/GMUYLMlb2Lz5JsgvCU5IpygreC6ZbikeiCxYK1gr2CqoFBlaqOVZWqkM2CxYLNgUGC5oLoiOqUypNJgsmT8YLCiMiP44LMjFGK1ILFlb2Lz5JsgvCU5IpygreC6YKxgsaCqoLFgquC3IK3gUKX4YKmgs6BQYjIibqCzILmgqSCyYFBM4LCgsyP8IyPgWmX4YKmgs6BQZbyjdyCzJOKl16P8IyPgsiCx4FqgsmCqIKvgumKZYjik2COcYLMlK2Mu5fKgvCCUoNUg5ODdoOLgriCwpGqkuiCt4FBj/CMj4rUgsWVvYvPkmyCzIjZgsiC6YLmgqSCyIjik2COcYxRgvCTr5LogrWCvYKigsaCq4LJgUGVqo5VlaqQzYLwl3CCooLpgrGCxoKqgsWCq4LcgreBQg0KYGBge3J9DQpzZXQuc2VlZCgxMDApDQpuIDwtIDEwDQpwIDwtIDUNCmNvbmQxIDwtIG1hdHJpeChybmJpbm9tKG4qcCwyLG11ID0gMTAwKSxuY29sPXAsZGltbmFtZXM9bGlzdChwYXN0ZTAoIkdlbmUiLDE6bikscGFzdGUwKCJDb25kaXRpb24iLHJlcCgxLHApKSkpDQpjb25kMiA8LSBtYXRyaXgocm5iaW5vbShuKnAsMixtdSA9IDEwMCksbmNvbD1wLGRpbW5hbWVzPWxpc3QocGFzdGUwKCJHZW5lIiwxOm4pLHBhc3RlMCgiQ29uZGl0aW9uIixyZXAoMixwKSkpKQ0KY29uZDMgPC0gbWF0cml4KHJuYmlub20obipwLDIsbXUgPSAzMDApLG5jb2w9cCxkaW1uYW1lcz1saXN0KHBhc3RlMCgiR2VuZSIsMTpuKSxwYXN0ZTAoIkNvbmRpdGlvbiIscmVwKDMscCkpKSkNCmRhdGFfZXhhbXBsZSA8LSBjYmluZChjb25kMSxjb25kMixjb25kMykNCmBgYA0KDQpgYGB7cixlY2hvPUZ9DQprYWJsZShkYXRhX2V4YW1wbGUpDQpgYGANCg0KDQoNCojqgsKW2oLMiOKTYI5xKEdlbmUxKYLJkc6CtYLEDQpgYGB7cn0NCihncm91cHMgPC0gY29sbmFtZXMoZGF0YV9leGFtcGxlKSkNCih4IDwtIGRhdGFfZXhhbXBsZVsxLF0pDQooeDIgPC0gZGF0YS5mcmFtZShleHByPXgsZ3JvdXBzPWdyb3VwcykpDQpgYGANCmBgYHtyfQ0KKGxtX2ZpdCA8LSBsbShleHByfjAgKyBmYWN0b3IoZ3JvdXBzKSx4MikpDQpsbV9maXQkY29udHJhc3RzDQoocmVzdWx0IDwtIGFub3ZhKGxtX2ZpdCkpDQpyZXN1bHQkYFByKD5GKWBbMV0NCmBgYA0KgqKC3IFBR2VuZTGCyZHOgrWCxIFBDQpgYGB7cn0NCmdyb3VwcyA8LSBjb2xuYW1lcyhkYXRhX2V4YW1wbGUpDQpwdmFscyA8LSBOVUxMDQpmb3IoaSBpbiAxOm4pew0KeCA8LSBkYXRhX2V4YW1wbGVbaSxdDQp4MiA8LSBkYXRhLmZyYW1lKGV4cHI9eCxncm91cHM9Z3JvdXBzKQ0KbG1fZml0IDwtIGxtKGV4cHJ+MCArIGZhY3Rvcihncm91cHMpLHgyKQ0KcmVzdWx0IDwtIGFub3ZhKGxtX2ZpdCkNCnB2YWxzW2ldIDwtIHJlc3VsdCRgUHIoPkYpYFsxXQ0KfQ0KKHF2YWxzIDwtIHAuYWRqdXN0KHB2YWxzLG1ldGhvZD0iQkgiKSkNCmBgYA0KDQqC4IK1gUGT8YxRitSCzJTkinKCxYKgguqCzoFBdC2Mn5LogsaTr4K2jIuJyoLwl16CpoLcgreBQg0KDQpgYGB7cn0NCmRhdGFfZXhhbXBsZTIgPC0gY2JpbmQoY29uZDEsY29uZDMpDQpncm91cHMgPC0gY29sbmFtZXMoZGF0YV9leGFtcGxlMikNCnB2YWxzX2Fub3ZhIDwtIE5VTEwNCmZvcihpIGluIDE6bil7DQp4IDwtIGRhdGFfZXhhbXBsZTJbaSxdDQp4MiA8LSBkYXRhLmZyYW1lKGV4cHI9eCxncm91cHM9Z3JvdXBzKQ0KbG1fZml0IDwtIGxtKGV4cHJ+ZmFjdG9yKGdyb3VwcykseDIpDQpyZXN1bHQgPC0gYW5vdmEobG1fZml0KQ0KcHZhbHNfYW5vdmFbaV0gPC0gcmVzdWx0JGBQcig+RilgWzFdDQp9DQoocXZhbHNfYW5vdmEgPC0gcC5hZGp1c3QocHZhbHNfYW5vdmEsbWV0aG9kPSJCSCIpKQ0KYGBgDQoNCmBgYHtyfQ0KcHZhbHNfdCA8LSBOVUxMDQpmb3IoaSBpbiAxOm4pew0KICB4IDwtIGRhdGFfZXhhbXBsZTJbaSwxOjVdDQogIHkgPC0gZGF0YV9leGFtcGxlMltpLDY6MTBdDQogIHB2YWxzX3RbaV0gPC0gdC50ZXN0KHgseSx2YXIuZXF1YWwgPSBUUlVFKSRwLnZhbHVlDQp9DQoocXZhbHNfdCA8LSBwLmFkanVzdChwdmFsc190LG1ldGhvZD0iQkgiKSkNCmBgYA0KDQpgYGB7cn0NCnJvdW5kKHB2YWxzX2Fub3ZhLDMpPT1yb3VuZChwdmFsc190LDMpIA0KYGBgDQoNCmBgYHtyfQ0Kcm91bmQocXZhbHNfYW5vdmEsMyk9PXJvdW5kKHF2YWxzX3QsMykNCmBgYA0KgUCCwoLcguh0jJ+S6ILNlaqOVZWqkM2CzJPBjuqDUIFbg1iCxYK3gUKC3IK9gUGVqo5VlaqQzYLFgs2VoZCUj/CMj4rUgsyNt4jZgvCI6pN4gsmMn5LogsWCq4Lpgr6Cr4LFgsiCrYFBj/CMj4rUgsyM8IzdjeyXcILIgseC4I6pjd2CyYOCg2aDioOTg0+CxYKrgumCvYLfgUGOwIyxg2aDVYNDg5OCyY2Hgu2CuYLEg4KDZoOLibuCt4LpgrGCxoKqicKUXILFgqCC6ZNfguCPZJd2gsWCt4FC