Load data

In [1]:
import cdt
eisen = cdt.ExpressionProfile("supp2data.cdt")
In [2]:
X = array(eisen.num)
X.shape
Out[2]:
(2467, 79)

Gene oriented SVD without scaling

In [3]:
%time (u,s,v) = svd(X, full_matrices = False)
CPU times: user 44 ms, sys: 0 ns, total: 44 ms
Wall time: 44 ms

Plot singular values

In [4]:
fig = figure()
plot(s)
display(fig)
In [5]:
fig = figure()
plot(s,"bo")
display(fig)

Project arrays into gene space

In [6]:
p1 = dot(u[:,0],X)
p2 = dot(u[:,1],X)
In [7]:
fig = figure()
plot(p1,p2,"bo")
display(fig)
             

Lets color by data set

In [8]:
print eisen.expCond
['alpha 0', 'alpha 7', 'alpha 14', 'alpha 21', 'alpha 28', 'alpha 35', 'alpha 42', 'alpha 49', 'alpha 56', 'alpha 63', 'alpha 70', 'alpha 77', 'alpha 84', 'alpha 91', 'alpha 98', 'alpha 105', 'alpha 112', 'alpha 119', 'Elu 0', 'Elu 30', 'Elu 60', 'Elu 90', 'Elu 120', 'Elu 150', 'Elu 180', 'Elu 210', 'Elu 240', 'Elu 270', 'Elu 300', 'Elu 330', 'Elu 360', 'Elu 390', 'cdc15 10', 'cdc15 30', 'cdc15 50', 'cdc15 70', 'cdc15 90', 'cdc15 110', 'cdc15 130', 'cdc15 150', 'cdc15 170', 'cdc15 190', 'cdc15 210', 'cdc15 230', 'cdc15 250', 'cdc15 270', 'cdc15 290', 'spo 0', 'spo 2', 'spo 5', 'spo 7', 'spo 9', 'spo 11', 'spo5 2', 'spo5 7', 'spo5 11', 'spo- early', 'spo- mid', 'heat 0', 'heat 10', 'heat 20', 'heat 40', 'heat 80', 'heat 160', 'dtt 15', 'dtt 30', 'dtt 60', 'dtt 120', 'cold 0', 'cold 20', 'cold 40', 'cold 160', 'diau a', 'diau b', 'diau c', 'diau d', 'diau e', 'diau f', 'diau g']
In [9]:
sorted(set(i.split()[0] for i in eisen.expCond))
Out[9]:
['Elu', 'alpha', 'cdc15', 'cold', 'diau', 'dtt', 'heat', 'spo', 'spo-', 'spo5']
In [10]:
colors = []
for i in eisen.expCond:
    if(i.startswith("Elu")):
        colors.append("red")
    elif(i.startswith("alpha")):
        colors.append("orange")
    elif(i.startswith("cdc15")):
        colors.append("yellow")
    elif(i.startswith("cold")):
        colors.append("green")
    elif(i.startswith("diau")):
        colors.append("cyan")
    elif(i.startswith("dtt")):
        colors.append("blue")
    elif(i.startswith("heat")):
        colors.append("purple")
    elif(i.startswith("spo")):
        colors.append("magenta")
    else:
        raise ValueError
In [11]:
len(colors), len(p1)
Out[11]:
(79, 79)
In [12]:
fig = figure()
for (i,j,c) in zip(p1,p2,colors):
    n pan class="n">plot([i],[j], color = c, marker = "o")
display(fig)

So, the second singular vector is separating out a lot of the sporulation data -- let's color and label this series

In [13]:
eisen.expCond.index("spo 0"), eisen.expCond.index("heat 0")
Out[13]:
(47, 58)
In [14]:
58-47
Out[14]:
11
In [15]:
(fig,ax) = subplots(1,1)
ax.scatter(p1,p2,color ="green", marker= "o")
ax.scatter(p1[47:58],p2[47:58],c = arange(47,58), cmap = get_cmap("seismic"))
for (x, y, label) in zip(p1[47:58], p2[47:58], eisen.expCond[47:58]):
    ax.annotate(label, xy = (x, y), xytext = (x-5,y+2),
                bbox = {"fc":'yellow',"alpha":.5,"boxstyle":"round,pad=0.5"})
display(fig)

Based on the key, the "spo5" columns are relative to t=5h rather than t=0 (effectively subtracting the spo5 vector) and the "spo-" columns are from an ndt80 knockout relative to t=2h or t=5h.

Here's the equivalent plot for the arrays projected onto the first three components (try rotating the plot in IPython)

In [16]:
p3 = dot(u[:,2],X)
from mpl_toolkits.mplot3d import Axes3D
fig = figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot(p1,p2,p3,"bo")
display(fig)

Or, with our previous coloring:

In [17]:
fig = figure()
ax = fig.add_subplot(111, projection='3d')
for (i,j,k,c) in zip(p1,p2,p3,colors):
    ax.plot([i],[j],[k], color = c, marker = "o")
display(fig)

Another view, highlighting the separation of the heat-shock data:

In [18]:
display(fig)