%matplotlib notebook
import matplotlib.pyplot as plt

import gzip
from csv import reader, excel_tab
import numpy as np

fp = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"), dialect = excel_tab)
header = fp.next()
data = []
annotations = []
for row in fp:
    annotations.append(row[:4])
    data.append([float(i) for i in row[4:]])
# This is new -- we deallocate the reader object to close the file when we're done reading it
del fp

anno = np.array(annotations)
anno[0]

array(['ENSMUSG00000000001', '14679',
       'guanine nucleotide binding protein (G protein), alpha inhibiting 3',
       'Gnai3'], 
      dtype='|S139')

anno.shape

(44340, 4)

d = np.array(data)
thresh = np.log(10)/np.log(2)

x = (np.sum(d >= thresh, axis = 1) >= 2)
f = d[x,:]
fa = anno[x,:]
f.shape, fa.shape

((9740, 12), (9740, 4))

x = (np.max(f, axis = 1) - np.min(f, axis = 1) >= 2)
f2 = f[x,:]
fa2 = fa[x,:]
f2.shape, fa2.shape

((2779, 12), (2779, 4))

!ls *.cdt

clustered1_cm.cdt	    clustered1_cm_scaled.cdt
clustered1_cm_centered.cdt  est_counts.cdt

cdt = open("clustered1_cm_centered.cdt").readlines()

cdt[0]

'GID\tUNIQID\tNAME\tGWEIGHT\tWT_unstim_rep1\tWT_unstim_rep2\tRipk3_unstim_rep1\tRipk3_unstim_rep2\tRipk3Casp8_unstim_rep1\tRipk3Casp8_unstim_rep2\tWT_LPS.6hr_rep1\tWT_LPS.6hr_rep2\tRipk3_LPS.6hr_rep1\tRipk3_LPS.6hr_rep2\tRipk3Casp8_LPS.6hr_rep1\tRipk3Casp8_LPS.6hr_rep2\n'

cdt[1]

'EWEIGHT\t\t\t\t1.000000\t1.000000\t1.000000\t1.000000\t1.000000\t1.000000\t1.000000\t1.000000\t1.000000\t1.000000\t1.000000\t1.000000\n'

cdt[2]

'GENE520X\tENSMUSG00000022221\tRipk3\t1.000000\t2.19086966825\t2.00234084225\t0.17825439925\t0.00667346725\t-0.05213574775\t-0.00872163275\t0.48554301925\t0.26040793925\t-0.85146982175\t-1.14610010875\t-1.72158431275\t-1.34407771175\n'

cdt[-1]

'GENE2467X\tENSMUSG00000069049\tEif2s3y\t1.000000\t-2.79573406792\t-2.66825227892\t1.89912772008\t1.98051739808\t0.933590345083\t1.02446919208\t-3.30688816992\t-3.10783405092\t2.00283768808\t1.99665289108\t1.07452459408\t0.966988739083\n'

out = open("example.txt","w")

out.write("Hello, world")

out.write("5")

out.write(str(5))

out.close()

open("example.txt").read()

'Hello, world55'

out = open("example2.txt","w")
out.write("Hello\n")
out.write("world\n")

out.close()

open("example2.txt").read()

'Hello\nworld\n'

print open("example2.txt").read()

Hello
world

from csv import writer, excel_tab

out = writer(open("example3.txt","w"), dialect = excel_tab)

out.writerow(("a","b","hello, world"))
out.writerow(("1","2","3","4"))
del out

sum((1,2,3))

6

print open("example3.txt").read()

a	b	hello, world
1	2	3	4

out = open("example4.txt","w")
out.write("\t".join(("hello","world"))+"\n")
out.write("\t".join(("1","3"))+"\n")
out.close()

open("example4.txt").read()

'hello\tworld\n1\t3\n'

print open("example4.txt").read()

hello	world
1	3

"\t".join(("hello","world"))

'hello\tworld'

code = {"A":"T","T":"A","G":"C","C":"G"}

code["A"]

'T'

code["T"]

'A'

code["T"] = "what?"

code

{'A': 'T', 'C': 'G', 'G': 'C', 'T': 'what?'}

code["N"] = "N"

code

{'A': 'T', 'C': 'G', 'G': 'C', 'N': 'N', 'T': 'what?'}

fa2[0]

array(['ENSMUSG00000000028', '12544', 'cell division cycle 45', 'Cdc45'], 
      dtype='|S139')

name_to_anno = {}
for row in fa2:
    name_to_anno[row[0]] = row

len(name_to_anno)

2734

len(fa2)

2779

entrez_to_anno = {}
for row in fa2:
    entrez_to_anno[row[1]] = row

len(entrez_to_anno)

2647

both_to_anno = {}
for row in fa2:
    both_to_anno[row[:2]] = row

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-53-375164faaf66> in <module>()
      1 both_to_anno = {}
      2 for row in fa2:
----> 3     both_to_anno[row[:2]] = row

TypeError: unhashable type: 'numpy.ndarray'

both_to_anno = {}
for row in fa2:
    both_to_anno[(row[0],row[1])] = row

both_to_anno = {}
for row in fa2:
    both_to_anno[row[0],row[1]] = row

len(both_to_anno)

2779

def f(x):
    return x+1,x-1

f(3)

(4, 2)

from csv import reader

for row in reader(open("clustered1_cm_centered.cdt"),excel_tab):
    print row
    break

['GID', 'UNIQID', 'NAME', 'GWEIGHT', 'WT_unstim_rep1', 'WT_unstim_rep2', 'Ripk3_unstim_rep1', 'Ripk3_unstim_rep2', 'Ripk3Casp8_unstim_rep1', 'Ripk3Casp8_unstim_rep2', 'WT_LPS.6hr_rep1', 'WT_LPS.6hr_rep2', 'Ripk3_LPS.6hr_rep1', 'Ripk3_LPS.6hr_rep2', 'Ripk3Casp8_LPS.6hr_rep1', 'Ripk3Casp8_LPS.6hr_rep2']

code["T"]

'what?'

code["spam"]

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-66-389726eb53ff> in <module>()
----> 1 code["spam"]

KeyError: 'spam'

code.has_key("spam")

False

try:
    x = code["spam"]
    print "it worked =)"
except:
    print "it didn't work =("

it didn't work =(

try:
    x = code["T"]
    print "it worked =)"
except:
    print "it didn't work =("

it worked =)