Goal: PCA of gene abundances in bone marrow derived macrophages (BMDMs) or an analogous cell line (J774s) at two time points after infection with either live Mycobacterium tuberculosis (Mtb), dead Mtb, or mock, based on kallisto estimates from the raw reads of Sci Rep 7:42225 (GSE88801)
import cdt_reader
(colnames,genes,annotations,data) = cdt_reader.parse_cdt("GSE88801_kallisto_TPMs_thresh10.cdt",0.)
%matplotlib
len(data),len(data[0])
import numpy as np
D = np.array(data)
D.shape
D = D[:,2:]
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
plt.imshow(D, interpolation = "none", aspect = "auto")
plt.colorbar()
fig
def col_center(d):
mean = np.mean(d, axis = 0)
return d - mean
def col_scale(d):
norm = np.linalg.norm(d, axis = 0)
return d/norm
def correlation_matrix(d):
scaled = col_scale(col_center(d))
return np.dot(scaled.T,scaled)
colnames
P = D[:,(0,2)]
fig = plt.figure()
plt.plot(P[:,0],P[:,1],"k,")
fig
z = (np.linalg.norm(P, axis = 1) != 0.)
z.shape,z[0]
P = P[z,:]
P.shape
C = correlation_matrix(P)
C.shape
fig = plt.figure()
plt.imshow(C, aspect = "auto", vmin = -1, vmax = 1)
plt.colorbar()
fig
a = (P[:,0]+P[:,1])/2. # average (geometric mean)
m = (P[:,1]-P[:,0]) # difference
fig = plt.figure()
plt.plot(a,m,"k,")
fig
AM = np.vstack((a,m)).T
AM.shape
C = correlation_matrix(AM)
fig = plt.figure()
plt.imshow(C, aspect = "auto", vmin = -1, vmax = 1)
plt.colorbar()
fig
(eigenvalues, eigenvectors) = np.linalg.eigh(correlation_matrix(P))
print(eigenvectors)
print(eigenvalues)
np.sqrt(1./2)
u,s,v = np.linalg.svd(col_center(P).T, full_matrices=False)
print(u)
print(s)
v = s**2
v/sum(v)
eigenvalues/sum(eigenvalues)
A = col_center(D.T)
u,s,v = np.linalg.svd(A.T, full_matrices = False)
u.shape
v = s**2
fig = plt.figure()
plt.plot(v/sum(v),"bo")
fig
projected = np.dot(A,u)
projected.shape
fig = plt.figure()
ax = fig.add_subplot(111, projection = "3d")
ax.plot(projected[:,0],projected[:,1],projected[:,2],"bo")
ax.set_xlabel("PC1")
ax.set_ylabel("PC2")
ax.set_zlabel("PC3")
fig
colors = {"BMDM":{"Live":"#ff0000", # red
"Dead":"#00ff00", # green
"uninfected":"#0000ff"}, # blue
"J774":{"Live":"#ffaaaa",
"Dead":"#aaffaa",
"uninfected":"#aaaaff"}}
markers = {"4h":"^","24h":"o"}
colors = {"BMDM":{"Live":"red",
"Dead":"green",
"uninfected":"blue"},
"J774":{"Live":"magenta",
"Dead":"orange",
"uninfected":"cyan"}}
markers = {"4h":"^","24h":"o"}
fig = plt.figure()
ax = fig.add_subplot(111, projection = "3d")
ax.plot(projected[:,0],projected[:,1],projected[:,2],"k,")
for (n,name) in enumerate(colnames):
(strain,infection,rep,time) = name.split("_")
marker = markers[time]
color = colors[strain][infection]
ax.plot([projected[n,0]],[projected[n,1]],[projected[n,2]],
marker = marker, color = color, linestyle = "none")
fig