changed BlockGenerator to select multiple columns at once, to handle cases where...
[pygr.git] / pygr / translationDB.py
blob6f3179e5e6cfa7d73ea728f70f88a3d42510bfae
1 from seqdb import SequenceDB, BasicSeqInfoDict
2 from annotation import AnnotationDB, TranslationAnnot, TranslationAnnotSlice
3 import classutil, sequence
4 import UserDict
7 class SeqTranslator(sequence.SequenceBase):
8 """Translator object for positive or minus strand of a sequence.
9 Slicing returns TranslationAnnotSlice of the appropriate
10 TranslationAnnot representing one of the six possible frames for
11 this sequence."""
12 def __init__(self, db, id, reversePath=None):
13 self.id = id
14 sequence.SequenceBase.__init__(self)
15 if reversePath: # create top-level object for reverse strand
16 self.orientation = -1
17 self.start = -len(self)
18 self.stop = 0
19 self._reverse = reversePath
20 if self.id not in self.db.seqDB:
21 raise KeyError('sequence %s not in db %s' % (self.id, self.db))
23 def __getitem__(self, k):
24 """get TranslationAnnotSlice for coordinates given by slice k """
25 start = k.start # deal with [:stop] slices
26 if start is None:
27 start = self.start
28 stop = k.stop # deal with [start:] slices
29 if stop is None:
30 stop = self.stop
32 annoID = self._get_anno_id(start)
33 a = self.db.annodb[annoID] # get TranslationAnnot object
34 s = a.sequence # corresponding nucleotide region
36 return a[(start - s.start) / 3: (stop - s.start) / 3]
38 def absolute_slice(self, start, stop):
39 """get protein slice in absolute nucleotide coords;
40 perform negation before slicing """
41 if start<0:
42 return (-self)[start:stop]
43 else:
44 return self[start:stop]
46 def __len__(self):
47 return self.db.seqInfoDict[self.id].length
49 def __neg__(self):
50 """get SeqTranslator for the opposite strand """
51 try:
52 return self._reverse
53 except AttributeError:
54 self._reverse = self.__class__(self.db, self.id,
55 reversePath=self)
56 return self._reverse
58 def _get_anno_id(self, start):
59 """get annotation ID for frame starting at start """
60 if self.orientation > 0: # positive strand
61 return '%s:%d' % (self.id, start % 3)
62 else: # negative strand
63 return '%s:-%d' % (self.id, (-start) % 3)
65 def iter_frames(self):
66 'iterate over the 6 possible frames, yielding TranslationAnnot'
67 for frame in ('0', '1', '2', '-0', '-1', '-2'):
68 yield self.db.annodb['%s:%s' % (self.id, frame)]
70 def __repr__(self):
71 return 'SeqTranslator(' + sequence.SequenceBase.__repr__(self) + ')'
73 class TranslationDB(SequenceDB):
74 """Provides an automatic translation interface for a nucleotide sequence
75 database: slicing of top-level sequence objects will return the
76 corresponding TranslationAnnotSlice for that slice, i.e. the
77 translated protein sequence, rather than the nucleotide sequence. """
78 itemClass = SeqTranslator
79 _seqtype = sequence.DNA_SEQTYPE
81 def __init__(self, seqDB, **kwargs):
82 self.seqDB = seqDB
83 try:
84 self.seqInfoDict = seqDB.seqInfoDict
85 except AttributeError:
86 self.seqInfoDict = BasicSeqInfoDict(seqDB)
87 self.annodb = AnnotationDB(SixFrameInfo(seqDB), seqDB,
88 itemClass=TranslationAnnot,
89 itemSliceClass=TranslationAnnotSlice,
90 sliceAttrDict=dict(id=0,start=1,stop=2),
91 checkFirstID=False)
92 SequenceDB.__init__(self, **kwargs)
96 class SixFrameInfo(object, UserDict.DictMixin):
97 """Dictionary of slice info for all six frames of each seq in seqDB. """
98 def __init__(self, seqDB):
99 self.seqDB = seqDB
101 def __getitem__(self, k):
102 "convert ID of form seqID:frame into slice info tuple"
103 i = k.rfind(':')
104 if i<0:
105 raise KeyError('invalid TranslationInfo key: %s' % (k,))
106 seqID = k[:i]
107 length = len(self.seqDB[seqID]) # sequence length
108 frame = int(k[i+1:])
109 if k[i+1] == '-': # negative frame -0, -1, or -2
110 return (seqID, -(length - ((length + frame) % 3)), frame)
111 else: # positive frame 0, 1 or 2
112 return (seqID, frame, length - ((length - frame) % 3))
114 def __len__(self):
115 return 6 * len(self.seqDB)
117 def __iter__(self):
118 for seqID in self.seqDB:
119 for frame in (':0', ':1', ':2', ':-0', ':-1', ':-2'):
120 yield seqID + frame
122 def keys(self):
123 return list(self)
125 # these methods should not be implemented for read-only database.
126 clear = setdefault = pop = popitem = copy = update = \
127 classutil.read_only_error
129 def get_translation_db(seqDB):
130 """Use cached seqDB.translationDB if already present, or create it """
131 try:
132 return seqDB.translationDB
133 except AttributeError: # create a new TranslationAnnot DB
134 tdb = TranslationDB(seqDB)
135 try:
136 seqDB.translationDB = tdb
137 except AttributeError:
138 pass # won't let us cache? Just hand back the TranslationDB
139 return tdb