%matplotlib notebook
import matplotlib.pyplot as plt
import gzip
from csv import reader, excel_tab
import numpy as np
fp = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"), dialect = excel_tab)
header = fp.next()
data = []
annotations = []
for row in fp:
annotations.append(row[:4])
data.append([float(i) for i in row[4:]])
# This is new -- we deallocate the reader object to close the file when we're done reading it
del fp
anno = np.array(annotations)
anno[0]
anno.shape
d = np.array(data)
thresh = np.log(10)/np.log(2)
x = (np.sum(d >= thresh, axis = 1) >= 2)
f = d[x,:]
fa = anno[x,:]
f.shape, fa.shape
x = (np.max(f, axis = 1) - np.min(f, axis = 1) >= 2)
f2 = f[x,:]
fa2 = fa[x,:]
f2.shape, fa2.shape
!ls *.cdt
cdt = open("clustered1_cm_centered.cdt").readlines()
cdt[0]
cdt[1]
cdt[2]
cdt[-1]
out = open("example.txt","w")
out.write("Hello, world")
out.write("5")
out.write(str(5))
out.close()
open("example.txt").read()
out = open("example2.txt","w")
out.write("Hello\n")
out.write("world\n")
out.close()
open("example2.txt").read()
print open("example2.txt").read()
from csv import writer, excel_tab
out = writer(open("example3.txt","w"), dialect = excel_tab)
out.writerow(("a","b","hello, world"))
out.writerow(("1","2","3","4"))
del out
sum((1,2,3))
print open("example3.txt").read()
out = open("example4.txt","w")
out.write("\t".join(("hello","world"))+"\n")
out.write("\t".join(("1","3"))+"\n")
out.close()
open("example4.txt").read()
print open("example4.txt").read()
"\t".join(("hello","world"))
code = {"A":"T","T":"A","G":"C","C":"G"}
code["A"]
code["T"]
code["T"] = "what?"
code
code["N"] = "N"
code
fa2[0]
name_to_anno = {}
for row in fa2:
name_to_anno[row[0]] = row
len(name_to_anno)
len(fa2)
entrez_to_anno = {}
for row in fa2:
entrez_to_anno[row[1]] = row
len(entrez_to_anno)
both_to_anno = {}
for row in fa2:
both_to_anno[row[:2]] = row
both_to_anno = {}
for row in fa2:
both_to_anno[(row[0],row[1])] = row
both_to_anno = {}
for row in fa2:
both_to_anno[row[0],row[1]] = row
len(both_to_anno)
def f(x):
return x+1,x-1
f(3)
from csv import reader
for row in reader(open("clustered1_cm_centered.cdt"),excel_tab):
print row
break
code["T"]
code["spam"]
code.has_key("spam")
try:
x = code["spam"]
print "it worked =)"
except:
print "it didn't work =("
try:
x = code["T"]
print "it worked =)"
except:
print "it didn't work =("