2 # $Id: SourceReader.py,v 1.15 2007/04/03 01:57:42 tavis_rudd Exp $
3 """SourceReader class for Cheetah's Parser and CodeGenerator
6 ================================================================================
7 Author: Tavis Rudd <tavis@damnsimple.com>
8 License: This software is released for unlimited distribution under the
9 terms of the MIT license. See the LICENSE file.
10 Version: $Revision: 1.15 $
11 Start Date: 2001/09/19
12 Last Revision Date: $Date: 2007/04/03 01:57:42 $
14 __author__
= "Tavis Rudd <tavis@damnsimple.com>"
15 __revision__
= "$Revision: 1.15 $"[11:-2]
20 EOLre
= re
.compile(r
'[ \f\t]*(?:\r\n|\r|\n)')
21 EOLZre
= re
.compile(r
'(?:\r\n|\r|\n|\Z)')
22 ENCODINGsearch
= re
.compile("coding[=:]\s*([-\w.]+)").search
24 class Error(Exception):
28 def __init__(self
, src
, filename
=None, breakPoint
=None, encoding
=None):
30 ## @@TR 2005-01-17: the following comes from a patch Terrel Shumway
31 ## contributed to add unicode support to the reading of Cheetah source
32 ## files with dynamically compiled templates. All the existing unit
33 ## tests pass but, it needs more testing and some test cases of its
34 ## own. My instinct is to move this up into the code that passes in the
35 ## src string rather than leaving it here. As implemented here it
36 ## forces all src strings to unicode, which IMO is not what we want.
37 # if encoding is None:
38 # # peek at the encoding in the first two lines
39 # m = EOLZre.search(src)
42 # m = EOLZre.search(src,pos)
44 # m = ENCODINGsearch(src,0,pos)
46 # encoding = m.group(1)
48 # encoding = sys.getfilesystemencoding()
49 # self._encoding = encoding
50 # if type(src) is not unicode:
51 # src = src.decode(encoding)
52 ## end of Terrel's patch
55 self
._filename
= filename
57 self
._srcLen
= len(src
)
58 if breakPoint
== None:
59 self
._breakPoint
= self
._srcLen
61 self
.setBreakPoint(breakPoint
)
64 self
._posTobookmarkMap
= {}
66 ## collect some meta-information
69 while pos
< len(self
):
70 EOLmatch
= EOLZre
.search(src
, pos
)
71 self
._EOLs
.append(EOLmatch
.start())
75 for pos
in self
._EOLs
:
76 BOLpos
= self
.findBOL(pos
)
77 self
._BOLs
.append(BOLpos
)
86 return self
._breakPoint
88 def __getitem__(self
, i
):
92 def __getslice__(self
, i
, j
):
93 i
= max(i
, 0); j
= max(j
, 0)
97 if not hasattr(self
, '_srcLines'):
98 self
._srcLines
= self
._src
.splitlines()
101 def lineNum(self
, pos
=None):
105 for i
in range(len(self
._BOLs
)):
106 if pos
>= self
._BOLs
[i
] and pos
<= self
._EOLs
[i
]:
109 def getRowCol(self
, pos
=None):
112 lineNum
= self
.lineNum(pos
)
113 BOL
, EOL
= self
._BOLs
[lineNum
], self
._EOLs
[lineNum
]
114 return lineNum
+1, pos
-BOL
+1
116 def getRowColLine(self
, pos
=None):
119 row
, col
= self
.getRowCol(pos
)
120 return row
, col
, self
.splitlines()[row
-1]
122 def getLine(self
, pos
):
125 lineNum
= self
.lineNum(pos
)
126 return self
.splitlines()[lineNum
]
131 def setPos(self
, pos
):
136 def validPos(self
, pos
):
137 return pos
<= self
._breakPoint
and pos
>=0
139 def checkPos(self
, pos
):
140 if not pos
<= self
._breakPoint
:
141 raise Error("pos (" + str(pos
) + ") is invalid: beyond the stream's end (" +
142 str(self
._breakPoint
-1) + ")" )
144 raise Error("pos (" + str(pos
) + ") is invalid: less than 0" )
146 def breakPoint(self
):
147 return self
._breakPoint
149 def setBreakPoint(self
, pos
):
150 if pos
> self
._srcLen
:
151 raise Error("New breakpoint (" + str(pos
) +
152 ") is invalid: beyond the end of stream's source string (" +
153 str(self
._srcLen
) + ")" )
155 raise Error("New breakpoint (" + str(pos
) + ") is invalid: less than 0" )
157 self
._breakPoint
= pos
159 def setBookmark(self
, name
):
160 self
._bookmarks
[name
] = self
._pos
161 self
._posTobookmarkMap
[self
._pos
] = name
163 def hasBookmark(self
, name
):
164 return self
._bookmarks
.has_key(name
)
166 def gotoBookmark(self
, name
):
167 if not self
.hasBookmark(name
):
168 raise Error("Invalid bookmark (" + name
+ ") is invalid: does not exist")
169 pos
= self
._bookmarks
[name
]
170 if not self
.validPos(pos
):
171 raise Error("Invalid bookmark (" + name
+ ', '+
172 str(pos
) + ") is invalid: pos is out of range" )
176 return self
._pos
>= self
._breakPoint
179 return self
._pos
== 0
181 def peek(self
, offset
=0):
182 self
.checkPos(self
._pos
+offset
)
183 pos
= self
._pos
+ offset
184 return self
._src
[pos
]
188 if self
.validPos(pos
+1):
190 return self
._src
[pos
]
192 def ungetc(self
, c
=None):
193 if not self
.atStart():
194 raise Error('Already at beginning of stream')
198 self
._src
[self
._pos
] = c
200 def advance(self
, offset
=1):
201 self
.checkPos(self
._pos
+ offset
)
204 def rev(self
, offset
=1):
205 self
.checkPos(self
._pos
- offset
)
208 def read(self
, offset
):
209 self
.checkPos(self
._pos
+ offset
)
212 return self
._src
[start
:self
._pos
]
214 def readTo(self
, to
, start
=None):
219 return self
._src
[start
:to
]
222 def readToEOL(self
, start
=None, gobble
=True):
223 EOLmatch
= EOLZre
.search(self
.src(), self
.pos())
227 pos
= EOLmatch
.start()
228 return self
.readTo(to
=pos
, start
=start
)
231 def find(self
, it
, pos
=None):
234 return self
._src
.find(it
, pos
)
236 def startswith(self
, it
, pos
=None):
237 if self
.find(it
, pos
) == self
.pos():
242 def rfind(self
, it
, pos
):
245 return self
._src
.rfind(it
, pos
)
247 def findBOL(self
, pos
=None):
251 return max(src
.rfind('\n',0,pos
)+1, src
.rfind('\r',0,pos
)+1, 0)
253 def findEOL(self
, pos
=None, gobble
=False):
257 match
= EOLZre
.search(self
.src(), pos
)
263 def isLineClearToPos(self
, pos
=None):
269 return BOL
== pos
or src
[BOL
:pos
].isspace()
271 def matches(self
, strOrRE
):
272 if isinstance(strOrRE
, (str, unicode)):
273 return self
.startswith(strOrRE
, pos
=self
.pos())
274 else: # assume an re object
275 return strOrRE
.match(self
.src(), self
.pos())
277 def matchWhiteSpace(self
, WSchars
=' \f\t'):
278 return (not self
.atEnd()) and self
.peek() in WSchars
280 def getWhiteSpace(self
, max=None, WSchars
=' \f\t'):
281 if not self
.matchWhiteSpace(WSchars
):
284 breakPoint
= self
.breakPoint()
286 breakPoint
= min(breakPoint
, self
.pos()+max)
287 while self
.pos() < breakPoint
:
289 if not self
.matchWhiteSpace(WSchars
):
291 return self
.src()[start
:self
.pos()]
293 def matchNonWhiteSpace(self
, WSchars
=' \f\t\n\r'):
294 return self
.atEnd() or not self
.peek() in WSchars
296 def getNonWhiteSpace(self
, WSchars
=' \f\t\n\r'):
297 if not self
.matchNonWhiteSpace(WSchars
):
300 while self
.pos() < self
.breakPoint():
302 if not self
.matchNonWhiteSpace(WSchars
):
304 return self
.src()[start
:self
.pos()]