Merge branch 'master' of git://repo.or.cz/pyTivo/wgw
[pyTivo.git] / Cheetah / SourceReader.py
blobe91c2001eb8f709033484b9028aff366d18955af
1 #!/usr/bin/env python
2 # $Id: SourceReader.py,v 1.14 2006/01/18 03:16:59 tavis_rudd Exp $
3 """SourceReader class for Cheetah's Parser and CodeGenerator
5 Meta-Data
6 ================================================================================
7 Author: Tavis Rudd <tavis@damnsimple.com>
8 License: This software is released for unlimited distribution under the
9 terms of the MIT license. See the LICENSE file.
10 Version: $Revision: 1.14 $
11 Start Date: 2001/09/19
12 Last Revision Date: $Date: 2006/01/18 03:16:59 $
13 """
14 __author__ = "Tavis Rudd <tavis@damnsimple.com>"
15 __revision__ = "$Revision: 1.14 $"[11:-2]
17 import re
18 import sys
20 EOLre = re.compile(r'[ \f\t]*(?:\r\n|\r|\n)')
21 EOLZre = re.compile(r'(?:\r\n|\r|\n|\Z)')
22 ENCODINGsearch = re.compile("coding[=:]\s*([-\w.]+)").search
24 class Error(Exception):
25 pass
27 class SourceReader:
28 def __init__(self, src, filename=None, breakPoint=None, encoding=None):
30 ## @@TR 2005-01-17: the following comes from a patch Terrel Shumway
31 ## contributed to add unicode support to the reading of Cheetah source
32 ## files with dynamically compiled templates. All the existing unit
33 ## tests pass but, it needs more testing and some test cases of its
34 ## own. My instinct is to move this up into the code that passes in the
35 ## src string rather than leaving it here. As implemented here it
36 ## forces all src strings to unicode, which IMO is not what we want.
37 # if encoding is None:
38 # # peek at the encoding in the first two lines
39 # m = EOLZre.search(src)
40 # pos = m.end()
41 # if pos<len(src):
42 # m = EOLZre.search(src,pos)
43 # pos = m.end()
44 # m = ENCODINGsearch(src,0,pos)
45 # if m:
46 # encoding = m.group(1)
47 # else:
48 # encoding = sys.getfilesystemencoding()
49 # self._encoding = encoding
50 # if type(src) is not unicode:
51 # src = src.decode(encoding)
52 ## end of Terrel's patch
54 self._src = src
55 self._filename = filename
57 self._srcLen = len(src)
58 if breakPoint == None:
59 self._breakPoint = self._srcLen
60 else:
61 self.setBreakPoint(breakPoint)
62 self._pos = 0
63 self._bookmarks = {}
64 self._posTobookmarkMap = {}
66 ## collect some meta-information
67 self._EOLs = []
68 pos = 0
69 while pos < len(self):
70 EOLmatch = EOLZre.search(src, pos)
71 self._EOLs.append(EOLmatch.start())
72 pos = EOLmatch.end()
74 self._BOLs = []
75 for pos in self._EOLs:
76 BOLpos = self.findBOL(pos)
77 self._BOLs.append(BOLpos)
79 def src(self):
80 return self._src
82 def filename(self):
83 return self._filename
85 def __len__(self):
86 return self._breakPoint
88 def __getitem__(self, i):
89 self.checkPos(i)
90 return self._src[i]
92 def __getslice__(self, i, j):
93 i = max(i, 0); j = max(j, 0)
94 return self._src[i:j]
96 def splitlines(self):
97 if not hasattr(self, '_srcLines'):
98 self._srcLines = self._src.splitlines()
99 return self._srcLines
101 def lineNum(self, pos=None):
102 if pos == None:
103 pos = self._pos
105 for i in range(len(self._BOLs)):
106 if pos >= self._BOLs[i] and pos <= self._EOLs[i]:
107 return i
109 def getRowCol(self, pos=None):
110 if pos == None:
111 pos = self._pos
112 lineNum = self.lineNum(pos)
113 BOL, EOL = self._BOLs[lineNum], self._EOLs[lineNum]
114 return lineNum+1, pos-BOL+1
116 def getRowColLine(self, pos=None):
117 if pos == None:
118 pos = self._pos
119 row, col = self.getRowCol(pos)
120 return row, col, self.splitlines()[row-1]
122 def getLine(self, pos):
123 if pos == None:
124 pos = self._pos
125 lineNum = self.lineNum(pos)
126 return self.splitlines()[lineNum]
128 def pos(self):
129 return self._pos
131 def setPos(self, pos):
132 self.checkPos(pos)
133 self._pos = pos
136 def validPos(self, pos):
137 return pos <= self._breakPoint and pos >=0
139 def checkPos(self, pos):
140 if not pos <= self._breakPoint:
141 raise Error("pos (" + str(pos) + ") is invalid: beyond the stream's end (" +
142 str(self._breakPoint-1) + ")" )
143 elif not pos >=0:
144 raise Error("pos (" + str(pos) + ") is invalid: less than 0" )
146 def breakPoint(self):
147 return self._breakPoint
149 def setBreakPoint(self, pos):
150 if pos > self._srcLen:
151 raise Error("New breakpoint (" + str(pos) +
152 ") is invalid: beyond the end of stream's source string (" +
153 str(self._srcLen) + ")" )
154 elif not pos >= 0:
155 raise Error("New breakpoint (" + str(pos) + ") is invalid: less than 0" )
157 self._breakPoint = pos
159 def setBookmark(self, name):
160 self._bookmarks[name] = self._pos
161 self._posTobookmarkMap[self._pos] = name
163 def hasBookmark(self, name):
164 return self._bookmarks.has_key(name)
166 def gotoBookmark(self, name):
167 if not self.hasBookmark(name):
168 raise Error("Invalid bookmark (" + name + ', '+
169 str(pos) + ") is invalid: does not exist" )
170 pos = self._bookmarks[name]
171 if not self.validPos(pos):
172 raise Error("Invalid bookmark (" + name + ', '+
173 str(pos) + ") is invalid: pos is out of range" )
174 self._pos = pos
176 def atEnd(self):
177 return self._pos >= self._breakPoint
179 def atStart(self):
180 return self._pos == 0
182 def peek(self, offset=0):
183 self.checkPos(self._pos+offset)
184 pos = self._pos + offset
185 return self._src[pos]
187 def getc(self):
188 pos = self._pos
189 if self.validPos(pos+1):
190 self._pos += 1
191 return self._src[pos]
193 def ungetc(self, c=None):
194 if not self.atStart():
195 raise Error('Already at beginning of stream')
197 self._pos -= 1
198 if not c==None:
199 self._src[self._pos] = c
201 def advance(self, offset=1):
202 self.checkPos(self._pos + offset)
203 self._pos += offset
205 def rev(self, offset=1):
206 self.checkPos(self._pos - offset)
207 self._pos -= offset
209 def read(self, offset):
210 self.checkPos(self._pos + offset)
211 start = self._pos
212 self._pos += offset
213 return self._src[start:self._pos]
215 def readTo(self, to, start=None):
216 self.checkPos(to)
217 if start == None:
218 start = self._pos
219 self._pos = to
220 return self._src[start:to]
223 def readToEOL(self, start=None, gobble=True):
224 EOLmatch = EOLZre.search(self.src(), self.pos())
225 if gobble:
226 pos = EOLmatch.end()
227 else:
228 pos = EOLmatch.start()
229 return self.readTo(to=pos, start=start)
232 def find(self, it, pos=None):
233 if pos == None:
234 pos = self._pos
235 return self._src.find(it, pos )
237 def startswith(self, it, pos=None):
238 if self.find(it, pos) == self.pos():
239 return True
240 else:
241 return False
243 def rfind(self, it, pos):
244 if pos == None:
245 pos = self._pos
246 return self._src.rfind(it, pos)
248 def findBOL(self, pos=None):
249 if pos == None:
250 pos = self._pos
251 src = self.src()
252 return max(src.rfind('\n',0,pos)+1, src.rfind('\r',0,pos)+1, 0)
254 def findEOL(self, pos=None, gobble=False):
255 if pos == None:
256 pos = self._pos
258 match = EOLZre.search(self.src(), pos)
259 if gobble:
260 return match.end()
261 else:
262 return match.start()
264 def isLineClearToPos(self, pos=None):
265 if pos == None:
266 pos = self.pos()
267 self.checkPos(pos)
268 src = self.src()
269 BOL = self.findBOL()
270 return BOL == pos or src[BOL:pos].isspace()
272 def matches(self, strOrRE):
273 if isinstance(strOrRE, (str, unicode)):
274 return self.startswith(strOrRE, pos=self.pos())
275 else: # assume an re object
276 return strOrRE.match(self.src(), self.pos())
278 def matchWhiteSpace(self, WSchars=' \f\t'):
279 return (not self.atEnd()) and self.peek() in WSchars
281 def getWhiteSpace(self, max=None, WSchars=' \f\t'):
282 if not self.matchWhiteSpace(WSchars):
283 return ''
284 start = self.pos()
285 breakPoint = self.breakPoint()
286 if max is not None:
287 breakPoint = min(breakPoint, self.pos()+max)
288 while self.pos() < breakPoint:
289 self.advance()
290 if not self.matchWhiteSpace(WSchars):
291 break
292 return self.src()[start:self.pos()]
294 def matchNonWhiteSpace(self, WSchars=' \f\t\n\r'):
295 return self.atEnd() or not self.peek() in WSchars
297 def getNonWhiteSpace(self, WSchars=' \f\t\n\r'):
298 if not self.matchNonWhiteSpace(WSchars):
299 return ''
300 start = self.pos()
301 while self.pos() < self.breakPoint():
302 self.advance()
303 if not self.matchNonWhiteSpace(WSchars):
304 break
305 return self.src()[start:self.pos()]