###################################################
### chunk number 1: Get the working directory
###################################################
#line 20 "R_Bioconductor_Hot_Topics.Rnw"
getwd()


###################################################
### chunk number 2: Go to the Affymetrix directory and list files
###################################################
#line 40 "R_Bioconductor_Hot_Topics.Rnw"
setwd("Affymetrix")
dir()


###################################################
### chunk number 3: Load Bioconductor libraries we'll need
###################################################
#line 50 "R_Bioconductor_Hot_Topics.Rnw"
library(affy)
library(limma)


###################################################
### chunk number 4: Read all Affymetrix CEL files in the current directory
###################################################
#line 61 "R_Bioconductor_Hot_Topics.Rnw"
CELs = ReadAffy()
CELs
setwd("../")


###################################################
### chunk number 5: Preprocess the set of arrays with RMA
###################################################
#line 69 "R_Bioconductor_Hot_Topics.Rnw"
eset = rma(CELs)


###################################################
### chunk number 6: Print a matrix of all RMA log2 values
###################################################
#line 77 "R_Bioconductor_Hot_Topics.Rnw"
eset.values = round(exprs(eset),4)
write.table(cbind(rownames(eset.values), eset.values), file="Affy_log2_rma_values.txt", 
	sep="\t", row.names=F, quote=F)


###################################################
### chunk number 7: Make and print Absent/Present calls for the set of arrays
###################################################
#line 85 "R_Bioconductor_Hot_Topics.Rnw"
mas5calls = mas5calls(CELs)
mas5calls.calls = exprs(mas5calls)
write.table(cbind(rownames(mas5calls.calls), mas5calls.calls), file="Affy_AP_calls.txt", 
	sep="\t", row.names=F, quote=F)


###################################################
### chunk number 8: Merge A/P calls and filter all "A" probesets
###################################################
#line 94 "R_Bioconductor_Hot_Topics.Rnw"
mas5calls.merged = apply(mas5calls.calls, 1, paste, collapse="")
eset.filtered = eset[mas5calls.merged!="AAAAAAAAAAAAAAAA",]
nrow(eset)
nrow(eset.filtered)


###################################################
### chunk number 9: Describe Affymetrix experimental design
###################################################
#line 107 "R_Bioconductor_Hot_Topics.Rnw"
targets = read.delim("Affymetrix/Affy_targets.txt")
design = model.matrix(~0+targets$Target)
colnames(design) = sort(unique(targets$Target))
rownames(design) = targets$FileName
head(design)


###################################################
### chunk number 10: Make Affymetrix contrast matrix showing desired sample comparisons
###################################################
#line 117 "R_Bioconductor_Hot_Topics.Rnw"
contrast.matrix = makeContrasts(
Old_trained - Old_sedentary,
Young_trained - Young_sedentary,
TrainedVsSedentary = ((Old_trained - Old_sedentary) + (Young_trained - Young_sedentary))/2,
levels=design)
contrast.matrix


###################################################
### chunk number 11: Run limma differential expression statistics on Affymetrix arrays
###################################################
#line 132 "R_Bioconductor_Hot_Topics.Rnw"
fit = lmFit(eset, design)
fit2 = contrasts.fit(fit, contrast.matrix)
fit2.ebayes = eBayes(fit2)


###################################################
### chunk number 12: List top differentially expressed probesets for each comparison
###################################################
#line 140 "R_Bioconductor_Hot_Topics.Rnw"
topTable(fit2.ebayes, coef=1, adjust="fdr")
topTable(fit2.ebayes, coef=2, adjust="fdr")
topTable(fit2.ebayes, coef=3, adjust="fdr")
write.fit(fit2.ebayes, file="Affy_limma_fdr.txt", digits=8, adjust="fdr")


###################################################
### chunk number 13: Get differentially expressed probesets using our definition
###################################################
#line 165 "R_Bioconductor_Hot_Topics.Rnw"
comparison = 3
DE = fit2.ebayes$p.value[,comparison] < 1e-3
sum(DE)


###################################################
### chunk number 14: Draw Affymetrix MA plot
###################################################
#line 175 "R_Bioconductor_Hot_Topics.Rnw"
plot(fit2.ebayes$Amean, fit2.ebayes$coeff[,comparison], main="MA plot", 
	ylab="log2 ( trained / sedentary )", xlab="mean expression level", 
	pch=20, cex=0.5)
points(fit2.ebayes$Amean[DE], fit2.ebayes$coeff[DE,comparison], col="blue", pch=20, cex=1)


###################################################
### chunk number 15: Draw Affymetrix volcano plot of raw p-values
###################################################
#line 186 "R_Bioconductor_Hot_Topics.Rnw"
plot(fit2.ebayes$coeff[,comparison], -log10(fit2.ebayes$p.value[,comparison]), main="Volcano plot", 
	xlab="log2 ( trained / sedentary )", ylab="log10 ( p-value )", pch=20, cex=0.5)
points(fit2.ebayes$coeff[DE,comparison], -log10(fit2.ebayes$p.value[DE,comparison]), 
	col="blue", pch=20, cex=1)


###################################################
### chunk number 16: Draw Affymetrix volcano plot of FDR p-values
###################################################
#line 197 "R_Bioconductor_Hot_Topics.Rnw"
FDR = p.adjust(fit2.ebayes$p.value[,comparison], method="fdr")
plot(fit2.ebayes$coeff[,comparison], -log10(FDR), main="Volcano plot", 
	xlab="log2 ( trained / sedentary )", ylab="log10 ( FDR )", pch=20, cex=0.5)
points(fit2.ebayes$coeff[DE,comparison], -log10(FDR[DE]), 
	col="blue", pch=20, cex=1)


###################################################
### chunk number 17: Get Affymetrix U133 Plus 2.0 probeset annotations
###################################################
#line 210 "R_Bioconductor_Hot_Topics.Rnw"
library(hgu133plus2.db)
library(annotate)
symbols = getSYMBOL(rownames(eset.values), "hgu133plus2")
write.table(cbind(rownames(eset.values), symbols), file="hgu133plus2.symbols.txt", 
	sep="\t", row.names=F, quote=F)


###################################################
### chunk number 18: Go to the Agilent directory and list files
###################################################
#line 225 "R_Bioconductor_Hot_Topics.Rnw"
setwd("Agilent")
dir()


###################################################
### chunk number 19: Load the Bioconductor library we'll need
###################################################
#line 234 "R_Bioconductor_Hot_Topics.Rnw"
library(limma)


###################################################
### chunk number 20: Read all the Agilent txt files in the directory
###################################################
#line 240 "R_Bioconductor_Hot_Topics.Rnw"
agilentFiles = dir(pattern = "GSM.*.txt$")
RG = read.maimages(agilentFiles, source="agilent")
setwd("../")


###################################################
### chunk number 21: Normalize the set of Agilent arrays
###################################################
#line 250 "R_Bioconductor_Hot_Topics.Rnw"
RG.nobg.0 = backgroundCorrect(RG, method="none", offset=0)
MA.loess.0 = normalizeWithinArrays(RG.nobg.0, method="loess")
MA.loess.q.0 = normalizeBetweenArrays(MA.loess.0, method="Aquantile")


###################################################
### chunk number 22: Access data from a RGList and a MAList
###################################################
#line 258 "R_Bioconductor_Hot_Topics.Rnw"
names(RG.nobg.0)
RG.nobg.0$R[1:5,1:5]
names(MA.loess.0)
MA.loess.0$M[1:5,1:5]


###################################################
### chunk number 23: Print all Agilent log2 ratios
###################################################
#line 267 "R_Bioconductor_Hot_Topics.Rnw"
log2.ratios = round(MA.loess.q.0$M, 4)
probe.info = MA.loess.q.0$genes[,c("ProbeName","GeneName")]
write.table(cbind(probe.info, log2.ratios), file="Agilent_log2_ratios.txt", 
  sep="\t", row.names=F, quote=F)


###################################################
### chunk number 24: Describe Agilent experimental design
###################################################
#line 278 "R_Bioconductor_Hot_Topics.Rnw"
targets = read.delim("Agilent/Agilent_targets.txt")
design = modelMatrix(targets, ref="UHR")
rownames(design) = agilentFiles
head(design)


###################################################
### chunk number 25: Make Agilent contrast matrix showing desired sample comparisons
###################################################
#line 289 "R_Bioconductor_Hot_Topics.Rnw"
contrast.matrix = makeContrasts(
Colon = crohns_descending_colon - healthy_descending_colon,
Ileum = crohns_terminal_ileum - healthy_terminal_ileum,
CrohnsVsHealthy = ((crohns_descending_colon - healthy_descending_colon) + 
	(crohns_terminal_ileum - healthy_terminal_ileum))/2,
levels=design)
contrast.matrix


###################################################
### chunk number 26: Run limma differential expression statistics on Agilent arrays
###################################################
#line 308 "R_Bioconductor_Hot_Topics.Rnw"
fit = lmFit(MA.loess.q.0, design)
fit2 = contrasts.fit(fit, contrast.matrix)
fit2.ebayes = eBayes(fit2)


###################################################
### chunk number 27: List top differentially expressed probes for each comparison
###################################################
#line 316 "R_Bioconductor_Hot_Topics.Rnw"
fit2.ebayes$genes = fit2.ebayes$genes[,c("ProbeName","GeneName")]
topTable(fit2.ebayes, coef=1, adjust="fdr")
topTable(fit2.ebayes, coef=2, adjust="fdr")
topTable(fit2.ebayes, coef=3, adjust="fdr")
write.fit(fit2.ebayes, file="Agilent_limma_fdr.txt", digits=8, adjust="fdr")


###################################################
### chunk number 28: Get differentially expressed probes using our definition
###################################################
#line 345 "R_Bioconductor_Hot_Topics.Rnw"
comparison = 3
DE = fit2.ebayes$p.value[,comparison] < 1e-3
sum(DE)


###################################################
### chunk number 29: Draw Agilent MA plot
###################################################
#line 355 "R_Bioconductor_Hot_Topics.Rnw"
plot(fit2.ebayes$Amean, fit2.ebayes$coeff[,comparison], main="MA plot", 
	ylab="log2 ( Crohn's / healthy )", xlab="mean expression level", 
	pch=20, cex=0.5)
points(fit2.ebayes$Amean[DE], fit2.ebayes$coeff[DE,comparison], col="blue", pch=20, cex=1)


###################################################
### chunk number 30: Draw Agilent volcano plot of raw p-values
###################################################
#line 366 "R_Bioconductor_Hot_Topics.Rnw"
plot(fit2.ebayes$coeff[,comparison], -log10(fit2.ebayes$p.value[,comparison]), main="Volcano plot", 
	xlab="log2 ( Crohn's / healthy )", ylab="log10 ( p-value )", pch=20, cex=0.5)
points(fit2.ebayes$coeff[DE,comparison], -log10(fit2.ebayes$p.value[DE,comparison]), 
	col="blue", pch=20, cex=1)


###################################################
### chunk number 31: Draw Agilent volcano plot of FDR p-values
###################################################
#line 377 "R_Bioconductor_Hot_Topics.Rnw"
FDR = p.adjust(fit2.ebayes$p.value[,comparison], method="fdr")
plot(fit2.ebayes$coeff[,comparison], -log10(FDR), main="Volcano plot", 
	xlab="log2 ( trained / sedentary )", ylab="log10 ( FDR )", pch=20, cex=0.5)
points(fit2.ebayes$coeff[DE,comparison], -log10(FDR[DE]), 
	col="blue", pch=20, cex=1)


###################################################
### chunk number 32: Go to the RNASeq directory and list files
###################################################
#line 394 "R_Bioconductor_Hot_Topics.Rnw"
setwd("RNASeq")
dir()


###################################################
### chunk number 33: Read matrix of raw counts
###################################################
#line 416 "R_Bioconductor_Hot_Topics.Rnw"
counts = read.delim("Brain_UHR_duplicates_counts.txt", row.names=1)
setwd("../")
nrow(counts)
head(counts)


###################################################
### chunk number 34: List group for each column of counts matrix
###################################################
#line 425 "R_Bioconductor_Hot_Topics.Rnw"
groups = c(rep("brain",2), rep("UHR", 2))
groups


###################################################
### chunk number 35: Remove genes with no counts and add pseudocount
###################################################
#line 432 "R_Bioconductor_Hot_Topics.Rnw"
sum.by.gene = apply(counts, 1, sum)
counts.no0genes = counts[sum.by.gene > 0,]
counts.no0genes.offset.1 = counts.no0genes + 1
nrow(counts.no0genes.offset.1)
head(counts.no0genes.offset.1)


###################################################
### chunk number 36: Load Bioconductor library we'll be using
###################################################
#line 448 "R_Bioconductor_Hot_Topics.Rnw"
library(DESeq)


###################################################
### chunk number 37: Create CountDataSet from preprocessed counts matrix
###################################################
#line 458 "R_Bioconductor_Hot_Topics.Rnw"
cds = newCountDataSet(counts.no0genes.offset.1, groups)


###################################################
### chunk number 38: Calculate normalization factors (2 methods)
###################################################
#line 464 "R_Bioconductor_Hot_Topics.Rnw"
# Naive way
sum.by.sample = apply(counts.no0genes.offset.1, 2, sum)
sum.by.sample
sum.by.sample/mean(sum.by.sample)
# DESeq recommendation
cds = estimateSizeFactors(cds)
sizeFactors(cds)


###################################################
### chunk number 39: calculate the variance for each gene
###################################################
#line 476 "R_Bioconductor_Hot_Topics.Rnw"
cds = estimateVarianceFunctions(cds)


###################################################
### chunk number 40: Run differential expression statistics on counts matrix using negative binomial distribution
###################################################
#line 482 "R_Bioconductor_Hot_Topics.Rnw"
results = nbinomTest(cds, "UHR", "brain")


###################################################
### chunk number 41: Look at and print results of RNA-Seq statistics
###################################################
#line 488 "R_Bioconductor_Hot_Topics.Rnw"
head(results)
counts.normalized = round(t(t(counts(cds))/sizeFactors(cds)), 2)
write.table(cbind(results, counts.normalized), file="RNA-Seq_DESeq_output.txt", 
	sep="\t", quote=F, row.names=F)


###################################################
### chunk number 42: Draw scatterplot comparing log2 counts
###################################################
#line 520 "R_Bioconductor_Hot_Topics.Rnw"
par(pty="s")
plot(log2(results$baseMeanA), log2(results$baseMeanB), main="Scatterplot",
	xlim=c(0,15), ylim=c(0,15), xlab="UHR", ylab="brain", pch=20, cex=0.5)


###################################################
### chunk number 43: Draw RNA-Seq MA plot
###################################################
#line 530 "R_Bioconductor_Hot_Topics.Rnw"
par(pty="s")
plot(log2(results$baseMean), results$log2FoldChange, main="MA plot", 
	ylab="log2(brain/UHR)", xlab="mean counts", pch=20, cex=0.5, xlim=c(0,15))


###################################################
### chunk number 44: Draw RNA-Seq volcano plot
###################################################
#line 539 "R_Bioconductor_Hot_Topics.Rnw"
par(pty="s")
plot(results$log2FoldChange, -log10(results$padj), main="Volcano plot", 
	xlab="log2(brain/UHR)", ylab="log10(FDR)", pch=20, cex=0.5)