2 # $Id: SourceReader.py,v 1.14 2006/01/18 03:16:59 tavis_rudd Exp $
3 """SourceReader class for Cheetah's Parser and CodeGenerator
6 ================================================================================
7 Author: Tavis Rudd <tavis@damnsimple.com>
8 License: This software is released for unlimited distribution under the
9 terms of the MIT license. See the LICENSE file.
10 Version: $Revision: 1.14 $
11 Start Date: 2001/09/19
12 Last Revision Date: $Date: 2006/01/18 03:16:59 $
14 __author__
= "Tavis Rudd <tavis@damnsimple.com>"
15 __revision__
= "$Revision: 1.14 $"[11:-2]
20 EOLre
= re
.compile(r
'[ \f\t]*(?:\r\n|\r|\n)')
21 EOLZre
= re
.compile(r
'(?:\r\n|\r|\n|\Z)')
22 ENCODINGsearch
= re
.compile("coding[=:]\s*([-\w.]+)").search
24 class Error(Exception):
28 def __init__(self
, src
, filename
=None, breakPoint
=None, encoding
=None):
30 ## @@TR 2005-01-17: the following comes from a patch Terrel Shumway
31 ## contributed to add unicode support to the reading of Cheetah source
32 ## files with dynamically compiled templates. All the existing unit
33 ## tests pass but, it needs more testing and some test cases of its
34 ## own. My instinct is to move this up into the code that passes in the
35 ## src string rather than leaving it here. As implemented here it
36 ## forces all src strings to unicode, which IMO is not what we want.
37 # if encoding is None:
38 # # peek at the encoding in the first two lines
39 # m = EOLZre.search(src)
42 # m = EOLZre.search(src,pos)
44 # m = ENCODINGsearch(src,0,pos)
46 # encoding = m.group(1)
48 # encoding = sys.getfilesystemencoding()
49 # self._encoding = encoding
50 # if type(src) is not unicode:
51 # src = src.decode(encoding)
52 ## end of Terrel's patch
55 self
._filename
= filename
57 self
._srcLen
= len(src
)
58 if breakPoint
== None:
59 self
._breakPoint
= self
._srcLen
61 self
.setBreakPoint(breakPoint
)
64 self
._posTobookmarkMap
= {}
66 ## collect some meta-information
69 while pos
< len(self
):
70 EOLmatch
= EOLZre
.search(src
, pos
)
71 self
._EOLs
.append(EOLmatch
.start())
75 for pos
in self
._EOLs
:
76 BOLpos
= self
.findBOL(pos
)
77 self
._BOLs
.append(BOLpos
)
86 return self
._breakPoint
88 def __getitem__(self
, i
):
92 def __getslice__(self
, i
, j
):
93 i
= max(i
, 0); j
= max(j
, 0)
97 if not hasattr(self
, '_srcLines'):
98 self
._srcLines
= self
._src
.splitlines()
101 def lineNum(self
, pos
=None):
105 for i
in range(len(self
._BOLs
)):
106 if pos
>= self
._BOLs
[i
] and pos
<= self
._EOLs
[i
]:
109 def getRowCol(self
, pos
=None):
112 lineNum
= self
.lineNum(pos
)
113 BOL
, EOL
= self
._BOLs
[lineNum
], self
._EOLs
[lineNum
]
114 return lineNum
+1, pos
-BOL
+1
116 def getRowColLine(self
, pos
=None):
119 row
, col
= self
.getRowCol(pos
)
120 return row
, col
, self
.splitlines()[row
-1]
122 def getLine(self
, pos
):
125 lineNum
= self
.lineNum(pos
)
126 return self
.splitlines()[lineNum
]
131 def setPos(self
, pos
):
136 def validPos(self
, pos
):
137 return pos
<= self
._breakPoint
and pos
>=0
139 def checkPos(self
, pos
):
140 if not pos
<= self
._breakPoint
:
141 raise Error("pos (" + str(pos
) + ") is invalid: beyond the stream's end (" +
142 str(self
._breakPoint
-1) + ")" )
144 raise Error("pos (" + str(pos
) + ") is invalid: less than 0" )
146 def breakPoint(self
):
147 return self
._breakPoint
149 def setBreakPoint(self
, pos
):
150 if pos
> self
._srcLen
:
151 raise Error("New breakpoint (" + str(pos
) +
152 ") is invalid: beyond the end of stream's source string (" +
153 str(self
._srcLen
) + ")" )
155 raise Error("New breakpoint (" + str(pos
) + ") is invalid: less than 0" )
157 self
._breakPoint
= pos
159 def setBookmark(self
, name
):
160 self
._bookmarks
[name
] = self
._pos
161 self
._posTobookmarkMap
[self
._pos
] = name
163 def hasBookmark(self
, name
):
164 return self
._bookmarks
.has_key(name
)
166 def gotoBookmark(self
, name
):
167 if not self
.hasBookmark(name
):
168 raise Error("Invalid bookmark (" + name
+ ', '+
169 str(pos
) + ") is invalid: does not exist" )
170 pos
= self
._bookmarks
[name
]
171 if not self
.validPos(pos
):
172 raise Error("Invalid bookmark (" + name
+ ', '+
173 str(pos
) + ") is invalid: pos is out of range" )
177 return self
._pos
>= self
._breakPoint
180 return self
._pos
== 0
182 def peek(self
, offset
=0):
183 self
.checkPos(self
._pos
+offset
)
184 pos
= self
._pos
+ offset
185 return self
._src
[pos
]
189 if self
.validPos(pos
+1):
191 return self
._src
[pos
]
193 def ungetc(self
, c
=None):
194 if not self
.atStart():
195 raise Error('Already at beginning of stream')
199 self
._src
[self
._pos
] = c
201 def advance(self
, offset
=1):
202 self
.checkPos(self
._pos
+ offset
)
205 def rev(self
, offset
=1):
206 self
.checkPos(self
._pos
- offset
)
209 def read(self
, offset
):
210 self
.checkPos(self
._pos
+ offset
)
213 return self
._src
[start
:self
._pos
]
215 def readTo(self
, to
, start
=None):
220 return self
._src
[start
:to
]
223 def readToEOL(self
, start
=None, gobble
=True):
224 EOLmatch
= EOLZre
.search(self
.src(), self
.pos())
228 pos
= EOLmatch
.start()
229 return self
.readTo(to
=pos
, start
=start
)
232 def find(self
, it
, pos
=None):
235 return self
._src
.find(it
, pos
)
237 def startswith(self
, it
, pos
=None):
238 if self
.find(it
, pos
) == self
.pos():
243 def rfind(self
, it
, pos
):
246 return self
._src
.rfind(it
, pos
)
248 def findBOL(self
, pos
=None):
252 return max(src
.rfind('\n',0,pos
)+1, src
.rfind('\r',0,pos
)+1, 0)
254 def findEOL(self
, pos
=None, gobble
=False):
258 match
= EOLZre
.search(self
.src(), pos
)
264 def isLineClearToPos(self
, pos
=None):
270 return BOL
== pos
or src
[BOL
:pos
].isspace()
272 def matches(self
, strOrRE
):
273 if isinstance(strOrRE
, (str, unicode)):
274 return self
.startswith(strOrRE
, pos
=self
.pos())
275 else: # assume an re object
276 return strOrRE
.match(self
.src(), self
.pos())
278 def matchWhiteSpace(self
, WSchars
=' \f\t'):
279 return (not self
.atEnd()) and self
.peek() in WSchars
281 def getWhiteSpace(self
, max=None, WSchars
=' \f\t'):
282 if not self
.matchWhiteSpace(WSchars
):
285 breakPoint
= self
.breakPoint()
287 breakPoint
= min(breakPoint
, self
.pos()+max)
288 while self
.pos() < breakPoint
:
290 if not self
.matchWhiteSpace(WSchars
):
292 return self
.src()[start
:self
.pos()]
294 def matchNonWhiteSpace(self
, WSchars
=' \f\t\n\r'):
295 return self
.atEnd() or not self
.peek() in WSchars
297 def getNonWhiteSpace(self
, WSchars
=' \f\t\n\r'):
298 if not self
.matchNonWhiteSpace(WSchars
):
301 while self
.pos() < self
.breakPoint():
303 if not self
.matchNonWhiteSpace(WSchars
):
305 return self
.src()[start
:self
.pos()]