1 from seqdb
import SequenceDB
, BasicSeqInfoDict
2 from annotation
import AnnotationDB
, TranslationAnnot
, TranslationAnnotSlice
3 import classutil
, sequence
7 class SeqTranslator(sequence
.SequenceBase
):
8 """Translator object for positive or minus strand of a sequence.
9 Slicing returns TranslationAnnotSlice of the appropriate
10 TranslationAnnot representing one of the six possible frames for
12 def __init__(self
, db
, id, reversePath
=None):
14 sequence
.SequenceBase
.__init
__(self
)
15 if reversePath
: # create top-level object for reverse strand
17 self
.start
= -len(self
)
19 self
._reverse
= reversePath
20 if self
.id not in self
.db
.seqDB
:
21 raise KeyError('sequence %s not in db %s' % (self
.id, self
.db
))
23 def __getitem__(self
, k
):
24 """get TranslationAnnotSlice for coordinates given by slice k """
25 start
= k
.start
# deal with [:stop] slices
28 stop
= k
.stop
# deal with [start:] slices
32 annoID
= self
._get
_anno
_id
(start
)
33 a
= self
.db
.annodb
[annoID
] # get TranslationAnnot object
34 s
= a
.sequence
# corresponding nucleotide region
36 return a
[(start
- s
.start
) / 3: (stop
- s
.start
) / 3]
38 def absolute_slice(self
, start
, stop
):
39 """get protein slice in absolute nucleotide coords;
40 perform negation before slicing """
42 return (-self
)[start
:stop
]
44 return self
[start
:stop
]
47 return self
.db
.seqInfoDict
[self
.id].length
50 """get SeqTranslator for the opposite strand """
53 except AttributeError:
54 self
._reverse
= self
.__class
__(self
.db
, self
.id,
58 def _get_anno_id(self
, start
):
59 """get annotation ID for frame starting at start """
60 if self
.orientation
> 0: # positive strand
61 return '%s:%d' % (self
.id, start
% 3)
62 else: # negative strand
63 return '%s:-%d' % (self
.id, (-start
) % 3)
65 def iter_frames(self
):
66 'iterate over the 6 possible frames, yielding TranslationAnnot'
67 for frame
in ('0', '1', '2', '-0', '-1', '-2'):
68 yield self
.db
.annodb
['%s:%s' % (self
.id, frame
)]
71 return 'SeqTranslator(' + sequence
.SequenceBase
.__repr
__(self
) + ')'
73 class TranslationDB(SequenceDB
):
74 """Provides an automatic translation interface for a nucleotide sequence
75 database: slicing of top-level sequence objects will return the
76 corresponding TranslationAnnotSlice for that slice, i.e. the
77 translated protein sequence, rather than the nucleotide sequence. """
78 itemClass
= SeqTranslator
79 _seqtype
= sequence
.DNA_SEQTYPE
81 def __init__(self
, seqDB
, **kwargs
):
84 self
.seqInfoDict
= seqDB
.seqInfoDict
85 except AttributeError:
86 self
.seqInfoDict
= BasicSeqInfoDict(seqDB
)
87 self
.annodb
= AnnotationDB(SixFrameInfo(seqDB
), seqDB
,
88 itemClass
=TranslationAnnot
,
89 itemSliceClass
=TranslationAnnotSlice
,
90 sliceAttrDict
=dict(id=0,start
=1,stop
=2),
92 SequenceDB
.__init
__(self
, **kwargs
)
96 class SixFrameInfo(object, UserDict
.DictMixin
):
97 """Dictionary of slice info for all six frames of each seq in seqDB. """
98 def __init__(self
, seqDB
):
101 def __getitem__(self
, k
):
102 "convert ID of form seqID:frame into slice info tuple"
105 raise KeyError('invalid TranslationInfo key: %s' % (k
,))
107 length
= len(self
.seqDB
[seqID
]) # sequence length
109 if k
[i
+1] == '-': # negative frame -0, -1, or -2
110 return (seqID
, -(length
- ((length
+ frame
) % 3)), frame
)
111 else: # positive frame 0, 1 or 2
112 return (seqID
, frame
, length
- ((length
- frame
) % 3))
115 return 6 * len(self
.seqDB
)
118 for seqID
in self
.seqDB
:
119 for frame
in (':0', ':1', ':2', ':-0', ':-1', ':-2'):
125 # these methods should not be implemented for read-only database.
126 clear
= setdefault
= pop
= popitem
= copy
= update
= \
127 classutil
.read_only_error
129 def get_translation_db(seqDB
):
130 """Use cached seqDB.translationDB if already present, or create it """
132 return seqDB
.translationDB
133 except AttributeError: # create a new TranslationAnnot DB
134 tdb
= TranslationDB(seqDB
)
136 seqDB
.translationDB
= tdb
137 except AttributeError:
138 pass # won't let us cache? Just hand back the TranslationDB