%cd ../data
import numpy as np
def clip(s):
return max(0,int(float(s)+.5))
!wget 'http://histo.ucsf.edu/BMS270/BMS270_2019/data/GSE88801_kallisto_est_counts_thresh10.cdt'
from csv import reader, excel_tab
orfs = []
names = []
data = []
fin = reader(open("GSE88801_kallisto_est_counts_thresh10.cdt"),dialect=excel_tab)
header = next(fin)[4:]
eweights = next(fin)[4:]
for row in fin:
orfs.append(row[1])
names.append(row[2])
data.append([clip(i) for i in row[4:]])
C[0,:10],C[-1,:10]
!diff -q sample_table.csv sample_table_v2.csv
C = np.array(data)
C.shape
!ls sample_table_v2.csv
!wget 'http://histo.ucsf.edu/BMS270/BMS270_2019/data/sample_table_v2.csv'
fp = reader(open("sample_table_v2.csv"))
sample_header = next(fp)
samples = []
name2sample = {}
for i in fp:
samples.append(i)
name2sample[i[0]] = i
!head sample_table_v2.csv
header[:5]
out = open("sample_table_v2.tdt","w")
out.write("\t".join(sample_header+["state"])+"\n")
for i in header:
s = name2sample[i]
out.write("\t".join(s + ["%s.%s.%s" % (s[2],s[1],s[3])])+"\n")
out.close()
!head sample_table_v2.tdt
out = open("GSE88801_kallisto_est_counts_thresh10.txt","w")
out.write("\t".join(["gene"]+header)+"\n")
for (name,row) in zip(names,C):
out.write("\t".join([name]+[str(i) for i in row])+"\n")
out.close()
%load_ext rpy2.ipython
%%R
library(limma)
library(edgeR)
%%R
samples <- read.delim("sample_table_v2.tdt")
print(summary(samples))
%%R
state <- samples$state
d <- model.matrix(~0+state)
colnames(d) <- gsub("state","",colnames(d))
print(colnames(d))
%%R
C <- read.delim("GSE88801_kallisto_est_counts_thresh10.txt",row.names=1)
dge <- DGEList(counts=C)
dge <- calcNormFactors(dge)
v <- voom(dge, d, plot = TRUE)
s = set(("A","b","C","C"))
s
"A" in s
"E" in s
if(1 == 1):
print("hi")
else:
print("there")
if("A" in s):
print("hi")
else:
print("there")