%matplotlib notebook
import matplotlib.pyplot as plt
import gzip
from csv import reader, excel_tab
import numpy as np
fp = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"), dialect = excel_tab)
header = fp.next()
data = []
annotations = []
for row in fp:
annotations.append(row[:4])
data.append([float(i) for i in row[4:]])
# This is new -- we deallocate the reader object to close the file when we're done reading it
del fp
anno = np.array(annotations)
anno[0]
d1 = {}
for line in anno:
d1[line[0]] = line
d2 = {}
for line in anno:
d2[line[0]] = line[2]
d1["ENSMUSG00000000001"]
d2["ENSMUSG00000000001"]
cdt = open("clustered1_cm_centered.cdt").readlines()
cdt[1].split("\t")
cdt[2]
line = cdt[2].split("\t")
print line
line[1]
d1[line[1]]
d2[line[1]]
x = [0,1,2,3,4,5]
x[:2]
x[2:]
x[:2]+["newdata"]+x[2:]
x = [[0,1,2,3],
["a","b","c","d"]]
x[0]
x[0][0]
'this is a "string" you know'
"Mark's code"
"""Now I can "use" both it's cool
and more
"""
def add_annotations(original, newfile, annotations):
"""Given original, the name of a CDT file in JavaTreeView extended CDT format,
and annotations, a two dimensional array of annotations, create newfile,
in CDT format, inserting an annotation column."""
# Index annotations
d1 = {}
for line in annotations:
d1[line[0]] = line
# Set up input and output streams
fin = open(original)
fout = open(newfile,"w")
# Write header lines
line = fin.next().split("\t")
assert(line[0] == "GID")
newline = line[:3]+["desc"]+line[3:]
fout.write("\t".join(newline))
line = fin.next().split("\t")
assert(line[0] == "EWEIGHT")
newline = line[:3]+[""]+line[3:]
fout.write("\t".join(newline))
for line in fin:
line = line.split("\t")
a = d1[line[1]][2]
newline = line[:3]+[a]+line[3:]
fout.write("\t".join(newline))
fin.close()
add_annotations("clustered1_cm_centered.cdt","annotated.cdt",annotations)
annotated = open("annotated.cdt").readlines()
annotated[0]
annotated[666]