from urllib import urlretrieve

urlretrieve("http://histo.ucsf.edu/BMS270/BMS270_2017/data/GSE86922_Brodsky_GEO_processed.txt.gz",
           "GSE86922_Brodsky_GEO_processed.txt.gz")

('GSE86922_Brodsky_GEO_processed.txt.gz',
 <httplib.HTTPMessage instance at 0x7f88f02dfea8>)

%ls *.gz

GSE86922_Brodsky_GEO_processed.txt.gz

open("GSE86922_Brodsky_GEO_processed.txt.gz").read(10)

"\x1f\x8b\x08\x08'\x14\xd8W\x00\x03"

import gzip

fp = gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz")

fp.read(10)

'EnsemblID\t'

gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz").readline()

'EnsemblID\tEntrezID\tGeneName\tGeneSymbol\tWT_unstim_rep1\tWT_unstim_rep2\tRipk3_unstim_rep1\tRipk3_unstim_rep2\tRipk3Casp8_unstim_rep1\tRipk3Casp8_unstim_rep2\tWT_LPS.6hr_rep1\tWT_LPS.6hr_rep2\tRipk3_LPS.6hr_rep1\tRipk3_LPS.6hr_rep2\tRipk3Casp8_LPS.6hr_rep1\tRipk3Casp8_LPS.6hr_rep2\n'

fp = gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz")

fp.next() # python3 fp.__next__()

'EnsemblID\tEntrezID\tGeneName\tGeneSymbol\tWT_unstim_rep1\tWT_unstim_rep2\tRipk3_unstim_rep1\tRipk3_unstim_rep2\tRipk3Casp8_unstim_rep1\tRipk3Casp8_unstim_rep2\tWT_LPS.6hr_rep1\tWT_LPS.6hr_rep2\tRipk3_LPS.6hr_rep1\tRipk3_LPS.6hr_rep2\tRipk3Casp8_LPS.6hr_rep1\tRipk3Casp8_LPS.6hr_rep2\n'

fp.next() # python3 fp.__next__()

'ENSMUSG00000000001\t14679\tguanine nucleotide binding protein (G protein), alpha inhibiting 3\tGnai3\t8.095852358\t8.219151897\t8.116617373\t8.127604792\t7.934172365\t8.028072233\t7.964154595\t7.910907456\t7.870409519\t7.842172144\t7.798445892\t7.831022299\n'

fp.next() # python3 fp.__next__()

'ENSMUSG00000000003\t54192\tprobasin\tPbsn\t-5.053418441\t-5.109382793\t-5.52780226\t-5.777688187\t-4.729163359\t-5.379859359\t-5.463289207\t-4.84042738\t-4.92583293\t-5.147399754\t-5.645921977\t-5.307409508\n'

print fp.next()

ENSMUSG00000000028	12544	cell division cycle 45	Cdc45	4.845938482	4.76866812	4.859137985	4.927080052	4.595017188	4.610244605	1.867627671	1.979751582	1.815634057	1.328333676	1.961408337	1.930995232

fp.next().split()

['ENSMUSG00000000031',
 'NA',
 'NA',
 'NA',
 '-5.053418441',
 '-5.109382793',
 '-3.942839759',
 '-4.192725686',
 '-4.729163359',
 '-3.057931264',
 '-5.463289207',
 '-4.84042738',
 '-4.92583293',
 '-5.147399754',
 '-5.645921977',
 '-5.307409508']

fp.next().split()

['ENSMUSG00000000037',
 '107815',
 'sex',
 'comb',
 'on',
 'midleg-like',
 '2',
 '(Drosophila)',
 'Scml2',
 '-0.409562252',
 '-0.251401798',
 '-0.035949164',
 '-0.049767732',
 '-0.336845936',
 '0.046405396',
 '-1.375826366',
 '-3.255464879',
 '-1.755907928',
 '-2.82547166',
 '-2.838567055',
 '-1.60696979']

fp.next().split("\t")

['ENSMUSG00000000049',
 '11818',
 'apolipoprotein H',
 'Apoh',
 '-5.053418441',
 '-5.109382793',
 '-5.52780226',
 '-5.777688187',
 '-4.729163359',
 '-5.379859359',
 '-5.463289207',
 '-4.84042738',
 '-4.92583293',
 '-3.562437254',
 '-5.645921977',
 '-5.307409508\n']

a = fp.next().split("\t")
a[-1] = a[-1][:-1]
a

['ENSMUSG00000000056',
 '67608',
 'nuclear prelamin A recognition factor',
 'Narf',
 '5.46916309',
 '5.502564149',
 '5.441584261',
 '5.558260378',
 '5.505654072',
 '5.428300414',
 '2.872101148',
 '3.231034982',
 '2.808876691',
 '2.765489582',
 '2.809405244',
 '2.973361262']

fp.next().rstrip("\r\n").split("\t")

['ENSMUSG00000000078',
 '23849',
 'Kruppel-like factor 6',
 'Klf6',
 '8.874266942',
 '8.884351708',
 '8.889720654',
 '8.858426597',
 '8.878975932',
 '8.812048069',
 '10.37672646',
 '10.36532769',
 '10.40665824',
 '10.4019337',
 '10.28728456',
 '10.31014405']

from csv import reader,excel_tab

import csv

r = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"),
           dialect = excel_tab)

r.next()

['EnsemblID',
 'EntrezID',
 'GeneName',
 'GeneSymbol',
 'WT_unstim_rep1',
 'WT_unstim_rep2',
 'Ripk3_unstim_rep1',
 'Ripk3_unstim_rep2',
 'Ripk3Casp8_unstim_rep1',
 'Ripk3Casp8_unstim_rep2',
 'WT_LPS.6hr_rep1',
 'WT_LPS.6hr_rep2',
 'Ripk3_LPS.6hr_rep1',
 'Ripk3_LPS.6hr_rep2',
 'Ripk3Casp8_LPS.6hr_rep1',
 'Ripk3Casp8_LPS.6hr_rep2']

r.next()

['ENSMUSG00000000001',
 '14679',
 'guanine nucleotide binding protein (G protein), alpha inhibiting 3',
 'Gnai3',
 '8.095852358',
 '8.219151897',
 '8.116617373',
 '8.127604792',
 '7.934172365',
 '8.028072233',
 '7.964154595',
 '7.910907456',
 '7.870409519',
 '7.842172144',
 '7.798445892',
 '7.831022299']

row = r.next()

row.__class__

list

row[0]

'ENSMUSG00000000003'

fields = row[:4]
fields

['ENSMUSG00000000003', '54192', 'probasin', 'Pbsn']

for i in row[4:]:
    fields.append(float(i))
fields

['ENSMUSG00000000003',
 '54192',
 'probasin',
 'Pbsn',
 -5.053418441,
 -5.109382793,
 -5.52780226,
 -5.777688187,
 -4.729163359,
 -5.379859359,
 -5.463289207,
 -4.84042738,
 -4.92583293,
 -5.147399754,
 -5.645921977,
 -5.307409508]

# Open gzipped, tab-delimited text file for reading as
# a sequence of lists
r = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"),
           dialect = excel_tab)
# Extract the column headers
header = r.next()
# This will be a table of gene annotations
annotations = []
# This will be a table of numerical values
data = []
# Parse annotations and values
for row in r:
    # First four columns are annotations
    annotations.append(row[:4])
    cells = []
    for i in row[4:]:
        cells.append(float(i))
    data.append(cells)

# Open gzipped, tab-delimited text file for reading as
# a sequence of lists
r = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"),
           dialect = excel_tab)
# Extract the column headers
header = r.next()
# This will be a table of gene annotations
annotations = []
# This will be a table of numerical values
data2 = []
# Parse annotations and values
rest = list(r)
for row in rest[:5]:
    # First four columns are annotations
    annotations.append(row[:4])
    cells = []
    for i in row[4:]:
        cells.append(float(i))
    data2.append(cells)

# Open gzipped, tab-delimited text file for reading as
# a sequence of lists
r = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"),
           dialect = excel_tab)
# Extract the column headers
header = r.next()
# This will be a table of gene annotations
annotations = []
# This will be a table of numerical values
data3 = []
# Parse annotations and values
rest = list(r)
for row in rest[:5]:
    # First four columns are annotations
    annotations.append(row[:4])
    for i in row[4:]:
        data3.append(i)

print data2

[[8.095852358, 8.219151897, 8.116617373, 8.127604792, 7.934172365, 8.028072233, 7.964154595, 7.910907456, 7.870409519, 7.842172144, 7.798445892, 7.831022299], [-5.053418441, -5.109382793, -5.52780226, -5.777688187, -4.729163359, -5.379859359, -5.463289207, -4.84042738, -4.92583293, -5.147399754, -5.645921977, -5.307409508], [4.845938482, 4.76866812, 4.859137985, 4.927080052, 4.595017188, 4.610244605, 1.867627671, 1.979751582, 1.815634057, 1.328333676, 1.961408337, 1.930995232], [-5.053418441, -5.109382793, -3.942839759, -4.192725686, -4.729163359, -3.057931264, -5.463289207, -4.84042738, -4.92583293, -5.147399754, -5.645921977, -5.307409508], [-0.409562252, -0.251401798, -0.035949164, -0.049767732, -0.336845936, 0.046405396, -1.375826366, -3.255464879, -1.755907928, -2.82547166, -2.838567055, -1.60696979]]

print data3

['8.095852358', '8.219151897', '8.116617373', '8.127604792', '7.934172365', '8.028072233', '7.964154595', '7.910907456', '7.870409519', '7.842172144', '7.798445892', '7.831022299', '-5.053418441', '-5.109382793', '-5.52780226', '-5.777688187', '-4.729163359', '-5.379859359', '-5.463289207', '-4.84042738', '-4.92583293', '-5.147399754', '-5.645921977', '-5.307409508', '4.845938482', '4.76866812', '4.859137985', '4.927080052', '4.595017188', '4.610244605', '1.867627671', '1.979751582', '1.815634057', '1.328333676', '1.961408337', '1.930995232', '-5.053418441', '-5.109382793', '-3.942839759', '-4.192725686', '-4.729163359', '-3.057931264', '-5.463289207', '-4.84042738', '-4.92583293', '-5.147399754', '-5.645921977', '-5.307409508', '-0.409562252', '-0.251401798', '-0.035949164', '-0.049767732', '-0.336845936', '0.046405396', '-1.375826366', '-3.255464879', '-1.755907928', '-2.82547166', '-2.838567055', '-1.60696979']

len(data)

44340

len(data[0])

12

%matplotlib notebook
import matplotlib.pyplot as plt
# from IPython.core.display import display

annotations[0]

['ENSMUSG00000000001',
 '14679',
 'guanine nucleotide binding protein (G protein), alpha inhibiting 3',
 'Gnai3']

fig = plt.figure()
plt.plot(data[0])

[<matplotlib.lines.Line2D at 0x7f88c88f4c50>]

fig = plt.figure()
plt.plot(data[0],"bo")

[<matplotlib.lines.Line2D at 0x7f88c89a1b10>]

sample1 = []
for i in data:
    sample1.append(i[0])
len(sample1)

44340

fig = plt.figure()
plt.plot(sample1)

[<matplotlib.lines.Line2D at 0x7f88c827c0d0>]

fig = plt.figure()
h = plt.hist(sample1)

fig = plt.figure()
plt.plot(sorted(sample1))

[<matplotlib.lines.Line2D at 0x7f88c6e1aa90>]

fig = plt.figure()
plt.plot(data[0],data[1],"bo")

[<matplotlib.lines.Line2D at 0x7f88c6b8d710>]

fig = plt.figure()
plt.imshow(data)

<matplotlib.image.AxesImage at 0x7f88c6787150>

fig = plt.figure()
plt.imshow(data, aspect = "auto")

<matplotlib.image.AxesImage at 0x7f88c60ced10>

fig = plt.figure()
plt.imshow(data, interpolation = "none", aspect = "auto")
plt.colorbar()

<matplotlib.colorbar.Colorbar at 0x7f88c4793390>