%cd ../data
import numpy as np
def clip(s):
return max(0,int(float(s)+.5))
from csv import reader, excel_tab
orfs = []
names = []
data = []
fin = reader(open("GSE88801_kallisto_est_counts_thresh10.cdt"),dialect=excel_tab)
header = next(fin)[4:]
eweights = next(fin)[4:]
for row in fin:
orfs.append(row[1])
names.append(row[2])
data.append([clip(i) for i in row[4:]])
C = np.array(data)
C.shape
from csv import reader
fp = reader(open("sample_table_v2.csv"))
sample_header = next(fp)
samples = []
name2sample = {}
for i in fp:
samples.append(i)
name2sample[i[0]] = i
sample_header
out = open("sample_table_v2.tdt","w")
out.write("\t".join(sample_header+["state"])+"\n")
for i in header:
s = name2sample[i]
out.write("\t".join(s + ["%s.%s.%s" % (s[2],s[1],s[3])])+"\n")
out.close()
out = open("GSE88801_kallisto_est_counts_thresh10.txt","w")
out.write("\t".join(["gene"]+header)+"\n")
for (name,row) in zip(names,C):
out.write("\t".join([name]+[str(i) for i in row])+"\n")
out.close()
%load_ext rpy2.ipython
%%R
library(limma)
library(edgeR)
%%R
samples <- read.delim("sample_table_v2.tdt")
print(summary(samples))
%%R
state <- samples$state
d <- model.matrix(~0+state)
colnames(d) <- gsub("state","",colnames(d))
print(colnames(d))
%%R
C <- read.delim("GSE88801_kallisto_est_counts_thresh10.txt",row.names=1)
dge <- DGEList(counts=C)
dge <- calcNormFactors(dge)
v <- voom(dge, d, plot = TRUE)
%%R
fit <- lmFit(v, d)
contrast.matrix <- makeContrasts(# S1 contrast
BMDM.uninfected.24-BMDM.uninfected.4,
# S2 contrasts
BMDM.Live.4-BMDM.uninfected.4,BMDM.Dead.4-BMDM.uninfected.4,
BMDM.Live.24-BMDM.uninfected.24,BMDM.Dead.24-BMDM.uninfected.24,
J774.Live.4-J774.uninfected.4,J774.Dead.4-J774.uninfected.4,
J774.Live.24-J774.uninfected.24,J774.Dead.24-J774.uninfected.24,
levels=d)
fit2 <- contrasts.fit(fit, contrast.matrix)
fit2 <- eBayes(fit2)
print(summary(decideTests(fit2)))
%%R
print(summary(decideTests(fit2,lfc=1)))
%%R
print(topTable(fit2))
%%R
print(colnames(fit2$coefficients))
%%R
print(topTable(fit2, coef="BMDM.uninfected.24 - BMDM.uninfected.4", lfc=1, p.value = .05))
%%R
for(tc in colnames(fit2$coefficients)){
print(tc)
# Extract all genes significantly differential on this contrast for a 2x fold change cutoff and 5% FDR
# Use write.csv rather than write.table for clean compatibility with python's csv.reader
write.csv(topTable(fit2, coef=tc, n = 40000, lfc=1, p.value = .05),
paste("limma1.",gsub(" ","",tc),".t0.csv",sep=""))
# Extract the adjusted p-values for this contrast for all genes, independent of significance
write.csv(topTable(fit2, coef=tc, n = 40000),
paste("limma1.",gsub(" ","",tc),".t1.csv",sep=""))
}