7 dna_letters
='AaTtUuGgCcNn'
16 elif l
=='T' or l
=='t':
18 ratio
=ndna
/float(len(s
))
25 return PROTEIN_SEQTYPE
31 return str(seq_id_counter
-1)
34 def write_fasta(ofile
,s
,chunk
=60,id=None,reformatter
=None):
35 "Trivial FASTA output"
39 except AttributeError:
42 ofile
.write('>' + id + '\n')
44 if reformatter
is not None: # APPLY THE DESIRED REFORMATTING
45 seq
= reformatter(seq
)
49 ofile
.write(seq
[pos
:pos
+chunk
] + '\n')
53 return id # IN CASE CALLER WANTS TEMP ID WE MAY HAVE ASSIGNED
55 def read_fasta(ifile
):
56 "iterate over id,title,seq from stream ifile"
61 if id is not None and len(seq
) > 0:
64 id = line
[1:].split()[0]
65 title
= line
[len(id)+2:]
67 elif id is not None: # READ SEQUENCE
68 for word
in line
.split(): # GET RID OF WHITESPACE
70 if id is not None and len(seq
) > 0:
73 raise IOError('no readable sequence in FASTA file!')
75 def read_fasta_one_line(ifile
): # @CTB deprecated; remove
76 "read a single sequence line, return id,title,seq"
80 line
= ifile
.readline(1024) # READ AT MOST 1KB
84 id = line
[1:].split()[0]
85 title
= line
[len(id)+2:]
86 elif id is not None: # READ SEQUENCE
87 for word
in line
.split(): # GET RID OF WHITESPACE
91 raise IOError('no readable sequence in FASTA file!')
93 def read_fasta_lengths(ifile
):
94 "Generate sequence ID,length from stream ifile"
100 if id is not None and seqLength
> 0:
103 id = line
[1:].split()[0]
105 elif id is not None: # READ SEQUENCE
106 for word
in line
.split(): # GET RID OF WHITESPACE
107 seqLength
+= len(word
)
108 if id is not None and seqLength
> 0:
111 raise IOError('no readable sequence in FASTA file!')
113 class AATranslation(object):
114 'customizable translation class'
115 geneticCode
= dict(TTY
='F', TTR
='L', TCN
='S', TAY
='Y', TGY
='C', TGG
='W',
116 CTN
='L', CCN
='P', CAY
='H', CAR
='Q', CGN
='R',
117 ATY
='I', ATA
='I', ATG
='M', ACN
='T', AAY
='N', AAR
='K',
119 GTN
='V', GCN
='A', GAY
='D', GAR
='E', GGN
='G',
122 'initialize our translation dictionary by applying N,Y,R codes'
123 geneticCode
= self
.geneticCode
.copy()
124 for codon
,aa
in self
.geneticCode
.items():
126 geneticCode
[codon
[:2]+'A'] = aa
127 geneticCode
[codon
[:2]+'T'] = aa
128 geneticCode
[codon
[:2]+'G'] = aa
129 geneticCode
[codon
[:2]+'C'] = aa
130 elif codon
[2] == 'Y':
131 geneticCode
[codon
[:2]+'T'] = aa
132 geneticCode
[codon
[:2]+'C'] = aa
133 elif codon
[2] == 'R':
134 geneticCode
[codon
[:2]+'A'] = aa
135 geneticCode
[codon
[:2]+'G'] = aa
136 self
.geneticCode
= geneticCode
137 def __call__(self
, s
):
138 'translate nucleotide string s to amino acid string'
140 s
= s
.replace('U', 'T')
142 for i
in range(0, len(s
), 3):
144 l
.append(self
.geneticCode
[s
[i
:i
+3]])
146 l
.append('X') # uninterpretable
149 translate_orf
= AATranslation() # default translation function