from urllib import urlretrieve
urlretrieve("http://histo.ucsf.edu/BMS270/BMS270_2017/data/GSE86922_Brodsky_GEO_processed.txt.gz",
"GSE86922_Brodsky_GEO_processed.txt.gz")
%ls *.gz
open("GSE86922_Brodsky_GEO_processed.txt.gz").read(10)
import gzip
fp = gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz")
fp.read(10)
gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz").readline()
fp = gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz")
fp.next() # python3 fp.__next__()
fp.next() # python3 fp.__next__()
fp.next() # python3 fp.__next__()
print fp.next()
fp.next().split()
fp.next().split()
fp.next().split("\t")
a = fp.next().split("\t")
a[-1] = a[-1][:-1]
a
fp.next().rstrip("\r\n").split("\t")
from csv import reader,excel_tab
import csv
r = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"),
dialect = excel_tab)
r.next()
r.next()
row = r.next()
row.__class__
row[0]
fields = row[:4]
fields
for i in row[4:]:
fields.append(float(i))
fields
# Open gzipped, tab-delimited text file for reading as
# a sequence of lists
r = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"),
dialect = excel_tab)
# Extract the column headers
header = r.next()
# This will be a table of gene annotations
annotations = []
# This will be a table of numerical values
data = []
# Parse annotations and values
for row in r:
# First four columns are annotations
annotations.append(row[:4])
cells = []
for i in row[4:]:
cells.append(float(i))
data.append(cells)
# Open gzipped, tab-delimited text file for reading as
# a sequence of lists
r = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"),
dialect = excel_tab)
# Extract the column headers
header = r.next()
# This will be a table of gene annotations
annotations = []
# This will be a table of numerical values
data2 = []
# Parse annotations and values
rest = list(r)
for row in rest[:5]:
# First four columns are annotations
annotations.append(row[:4])
cells = []
for i in row[4:]:
cells.append(float(i))
data2.append(cells)
# Open gzipped, tab-delimited text file for reading as
# a sequence of lists
r = reader(gzip.open("GSE86922_Brodsky_GEO_processed.txt.gz"),
dialect = excel_tab)
# Extract the column headers
header = r.next()
# This will be a table of gene annotations
annotations = []
# This will be a table of numerical values
data3 = []
# Parse annotations and values
rest = list(r)
for row in rest[:5]:
# First four columns are annotations
annotations.append(row[:4])
for i in row[4:]:
data3.append(i)
print data2
print data3
len(data)
len(data[0])
%matplotlib notebook
import matplotlib.pyplot as plt
# from IPython.core.display import display
annotations[0]
fig = plt.figure()
plt.plot(data[0])
fig = plt.figure()
plt.plot(data[0],"bo")
sample1 = []
for i in data:
sample1.append(i[0])
len(sample1)
fig = plt.figure()
plt.plot(sample1)
fig = plt.figure()
h = plt.hist(sample1)
fig = plt.figure()
plt.plot(sorted(sample1))
fig = plt.figure()
plt.plot(data[0],data[1],"bo")
fig = plt.figure()
plt.imshow(data)
fig = plt.figure()
plt.imshow(data, aspect = "auto")
fig = plt.figure()
plt.imshow(data, interpolation = "none", aspect = "auto")
plt.colorbar()