%matplotlib nbagg
import matplotlib.pyplot as plt
%cd ../data/
from csv import reader, excel_tab
orfs = []
names = []
data = []
fin = reader(open("GSE88801_kallisto_TPMs_thresh10.cdt"),dialect=excel_tab)
header = next(fin)[2:]
for row in fin:
orfs.append(row[0])
names.append(row[1])
data.append([float(i) for i in row[2:]])
import numpy as np
A = np.array(data)
A.shape, A.dtype
np.mean(A)
m = np.mean(A,axis=0)
m.shape
m = np.mean(A,axis=1)
m.shape
m = m.reshape((9939,1))
m.shape
D = A - m
D.shape
fig = plt.figure()
plt.imshow(A, interpolation="none", aspect="auto")
plt.colorbar()
fig = plt.figure()
plt.imshow(D, interpolation="none", aspect="auto")
plt.colorbar()
from numpy.linalg import norm
s = norm(D, axis=1)
s.shape
s = s.reshape((9939,1))
C = D/s
fig = plt.figure()
plt.imshow(C, interpolation="none", aspect="auto")
plt.colorbar()
header[:10]
fig = plt.figure()
plt.plot(A[:100,0],A[:100,2],"bo")
fig = plt.figure()
plt.plot(D[:100,0],D[:100,2],"bo")
fig = plt.figure()
plt.plot(C[:100,0],C[:100,2],"bo")
corr = np.dot(C,C.T)
corr.shape
fig = plt.figure()
plt.imshow(corr[:1000,:1000],interpolation="none",aspect="auto")
plt.colorbar()
np.pi
theta = np.pi/2
rotation = np.array([[np.cos(theta),-np.sin(theta)],
[np.sin(theta),np.cos(theta)]])
rotation
test_points = np.array([[1,2,3,4,5],
[-1,-2,3,8,7]], dtype="float")
test_points
fig = plt.figure()
plt.plot(test_points[0],test_points[1],"bo")
rotated = np.dot(rotation,test_points)
plt.plot(rotated[0],rotated[1],"ro")
import Bio.Cluster as Pycluster
Dsub = D[:1000,:]
Dsub.shape
%%time
tree = Pycluster.treecluster(Dsub, dist="u")
Dsub = D[:2000,:]
Dsub.shape
%%time
tree = Pycluster.treecluster(Dsub, dist="u")
%%time
tree = Pycluster.treecluster(D, dist="u")
record = Pycluster.Record()
record.data = D
record.geneid = orfs[:]
record.genename = names[:]
record.gweight = None
record.gorder = None
record.expid = header[:]
record.eweight = None
record.eorder = None
record.uniqid = "UNIQID"
record.save("GSE88801_kallisto_TPMs_thresh10.centered.um", geneclusters = tree)
rsync -av -e 'ssh -p 8022' explorer@localhost:BMS270/data/"\*.centered.um.\*" ./