Goal: Introduction to pairwise sequence alignment

In [1]:
!curl 'http://histo.ucsf.edu/BMS270/BMS270_2018/code/geneticCode.py' > geneticCode.py
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   915  100   915    0     0  56035      0 --:--:-- --:--:-- --:--:-- 57187

In [2]:
from geneticCode import geneticCode
In [3]:
geneticCode["ATG"]
Out[3]:
'M'
In [4]:
geneticCode["NNN"]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-4-b70f9a2c6a01> in <module>()
----> 1 geneticCode["NNN"]

KeyError: 'NNN'
In [5]:
geneticCode["aaa"]
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-5-d6f92b870578> in <module>()
----> 1 geneticCode["aaa"]

KeyError: 'aaa'
In [6]:
geneticCode["aaa".upper()]
Out[6]:
'K'
In [7]:
"asdf".upper()
Out[7]:
'ASDF'
In [8]:
"this has spaces".replace(" ","")
Out[8]:
'thishasspaces'
In [9]:
"".join(i for i in "ATGCNNFGCTAGN  NttgC".upper() if(i in "ATGC"))
Out[9]:
'ATGCGCTAGTTGC'
In [10]:
"hi" in {"hi":"there"}
Out[10]:
True
In [11]:
for (key,val) in geneticCode.items():
    print(key,"->",val)
CTT -> L
TGG -> W
TCG -> S
GTT -> V
GTG -> V
GCG -> A
ACT -> T
TTG -> L
GAC -> D
TAC -> Y
TTC -> F
ACC -> T
GCT -> A
CGG -> R
CCC -> P
CGA -> R
CTA -> L
GGG -> G
TAA -> *
TCC -> S
GTC -> V
CAT -> H
AAA -> K
GTA -> V
CCA -> P
CAG -> Q
AGG -> R
AGC -> S
AGA -> R
CCG -> P
AAC -> N
CGC -> R
GGT -> G
GAG -> E
TAT -> Y
ACG -> T
AAT -> N
ACA -> T
TTA -> L
ATC -> I
CCT -> P
GAT -> D
TGT -> C
TCT -> S
AAG -> K
CGT -> R
CTC -> L
GGC -> G
GCC -> A
TCA -> S
TGC -> C
GCA -> A
ATA -> I
TTT -> F
CAC -> H
TAG -> *
ATG -> M
AGT -> S
CAA -> Q
ATT -> I
TGA -> *
CTG -> L
GGA -> G
GAA -> E

In [12]:
def identity_matrix(alphabet):
    d = {}
    for i in alphabet:
        d[i] = {}
        for j in alphabet:
            if(i == j):
                d[i][j] = 1
            else:
                d[i][j] = -1
    return d
In [13]:
identity_matrix("ATGC")
Out[13]:
{'A': {'A': 1, 'C': -1, 'G': -1, 'T': -1},
 'C': {'A': -1, 'C': 1, 'G': -1, 'T': -1},
 'G': {'A': -1, 'C': -1, 'G': 1, 'T': -1},
 'T': {'A': -1, 'C': -1, 'G': -1, 'T': 1}}
In [14]:
score = identity_matrix("ATGC")
In [15]:
seq1 = "TGTGGT"
seq2 = "TGAGGT"
In [16]:
score["A"]["T"]
Out[16]:
-1
In [ ]:
 
In [ ]: