Cheetah/SourceReader.py

   1 #!/usr/bin/env python
   2 # $Id: SourceReader.py,v 1.14 2006/01/18 03:16:59 tavis_rudd Exp $
   3 """SourceReader class for Cheetah's Parser and CodeGenerator
   4
   5 Meta-Data
   6 ================================================================================
   7 Author: Tavis Rudd <tavis@damnsimple.com>
   8 License: This software is released for unlimited distribution under the
   9          terms of the MIT license.  See the LICENSE file.
  10 Version: $Revision: 1.14 $
  11 Start Date: 2001/09/19
  12 Last Revision Date: $Date: 2006/01/18 03:16:59 $
  13 """
  14 __author__ = "Tavis Rudd <tavis@damnsimple.com>"
  15 __revision__ = "$Revision: 1.14 $"[11:-2]
  16
  17 import re
  18 import sys
  19
  20 EOLre = re.compile(r'[ \f\t]*(?:\r\n|\r|\n)')
  21 EOLZre = re.compile(r'(?:\r\n|\r|\n|\Z)')
  22 ENCODINGsearch = re.compile("coding[=:]\s*([-\w.]+)").search
  23
  24 class Error(Exception):
  25     pass
  26
  27 class SourceReader:
  28     def __init__(self, src, filename=None, breakPoint=None, encoding=None):
  29
  30         ## @@TR 2005-01-17: the following comes from a patch Terrel Shumway
  31         ## contributed to add unicode support to the reading of Cheetah source
  32         ## files with dynamically compiled templates. All the existing unit
  33         ## tests pass but, it needs more testing and some test cases of its
  34         ## own. My instinct is to move this up into the code that passes in the
  35         ## src string rather than leaving it here.  As implemented here it
  36         ## forces all src strings to unicode, which IMO is not what we want.
  37         #  if encoding is None:
  38         #      # peek at the encoding in the first two lines
  39         #      m = EOLZre.search(src)
  40         #      pos = m.end()
  41         #      if pos<len(src):
  42         #          m = EOLZre.search(src,pos)
  43         #          pos = m.end()
  44         #      m = ENCODINGsearch(src,0,pos)
  45         #      if m:
  46         #          encoding = m.group(1)
  47         #      else:
  48         #          encoding  = sys.getfilesystemencoding()
  49         #  self._encoding = encoding
  50         #  if type(src) is not unicode:
  51         #      src = src.decode(encoding)
  52         ## end of Terrel's patch
  53
  54         self._src = src
  55         self._filename = filename
  56
  57         self._srcLen = len(src)
  58         if breakPoint == None:
  59             self._breakPoint = self._srcLen
  60         else:
  61             self.setBreakPoint(breakPoint)
  62         self._pos = 0
  63         self._bookmarks = {}
  64         self._posTobookmarkMap = {}
  65
  66         ## collect some meta-information
  67         self._EOLs = []
  68         pos = 0
  69         while pos < len(self):
  70             EOLmatch = EOLZre.search(src, pos)
  71             self._EOLs.append(EOLmatch.start())
  72             pos = EOLmatch.end()
  73
  74         self._BOLs = []
  75         for pos in self._EOLs:
  76             BOLpos = self.findBOL(pos)
  77             self._BOLs.append(BOLpos)
  78
  79     def src(self):
  80         return self._src
  81
  82     def filename(self):
  83         return self._filename
  84
  85     def __len__(self):
  86         return self._breakPoint
  87
  88     def __getitem__(self, i):
  89         self.checkPos(i)
  90         return self._src[i]
  91
  92     def __getslice__(self, i, j):
  93         i = max(i, 0); j = max(j, 0)
  94         return self._src[i:j]
  95
  96     def splitlines(self):
  97         if not hasattr(self, '_srcLines'):
  98             self._srcLines = self._src.splitlines()
  99         return self._srcLines
 100
 101     def lineNum(self, pos=None):
 102         if pos == None:
 103             pos = self._pos
 104
 105         for i in range(len(self._BOLs)):
 106             if pos >= self._BOLs[i] and pos <= self._EOLs[i]:
 107                 return i
 108
 109     def getRowCol(self, pos=None):
 110         if pos == None:
 111             pos = self._pos
 112         lineNum = self.lineNum(pos)
 113         BOL, EOL = self._BOLs[lineNum], self._EOLs[lineNum]
 114         return lineNum+1, pos-BOL+1
 115
 116     def getRowColLine(self, pos=None):
 117         if pos == None:
 118             pos = self._pos
 119         row, col = self.getRowCol(pos)
 120         return row, col, self.splitlines()[row-1]
 121
 122     def getLine(self, pos):
 123         if pos == None:
 124             pos = self._pos
 125         lineNum = self.lineNum(pos)
 126         return self.splitlines()[lineNum]
 127
 128     def pos(self):
 129         return self._pos
 130
 131     def setPos(self, pos):
 132         self.checkPos(pos)
 133         self._pos = pos
 134
 135
 136     def validPos(self, pos):
 137         return pos <= self._breakPoint and pos >=0
 138
 139     def checkPos(self, pos):
 140         if not pos <= self._breakPoint:
 141             raise Error("pos (" + str(pos) + ") is invalid: beyond the stream's end (" +
 142                         str(self._breakPoint-1) + ")" )
 143         elif not pos >=0:
 144             raise Error("pos (" + str(pos) + ") is invalid: less than 0" )
 145
 146     def breakPoint(self):
 147         return self._breakPoint
 148
 149     def setBreakPoint(self, pos):
 150         if pos > self._srcLen:
 151             raise Error("New breakpoint (" + str(pos) +
 152                         ") is invalid: beyond the end of stream's source string (" +
 153                         str(self._srcLen) + ")" )
 154         elif not pos >= 0:
 155             raise Error("New breakpoint (" + str(pos) + ") is invalid: less than 0" )
 156
 157         self._breakPoint = pos
 158
 159     def setBookmark(self, name):
 160         self._bookmarks[name] = self._pos
 161         self._posTobookmarkMap[self._pos] = name
 162
 163     def hasBookmark(self, name):
 164         return self._bookmarks.has_key(name)
 165
 166     def gotoBookmark(self, name):
 167         if not self.hasBookmark(name):
 168             raise Error("Invalid bookmark (" + name + ', '+
 169                         str(pos) + ") is invalid: does not exist" )
 170         pos = self._bookmarks[name]
 171         if not self.validPos(pos):
 172             raise Error("Invalid bookmark (" + name + ', '+
 173                         str(pos) + ") is invalid: pos is out of range" )
 174         self._pos = pos
 175
 176     def atEnd(self):
 177         return self._pos >= self._breakPoint
 178
 179     def atStart(self):
 180         return self._pos == 0
 181
 182     def peek(self, offset=0):
 183         self.checkPos(self._pos+offset)
 184         pos = self._pos + offset
 185         return self._src[pos]
 186
 187     def getc(self):
 188         pos = self._pos
 189         if self.validPos(pos+1):
 190             self._pos += 1
 191         return self._src[pos]
 192
 193     def ungetc(self, c=None):
 194         if not self.atStart():
 195             raise Error('Already at beginning of stream')
 196
 197         self._pos -= 1
 198         if not c==None:
 199             self._src[self._pos] = c
 200
 201     def advance(self, offset=1):
 202         self.checkPos(self._pos + offset)
 203         self._pos += offset
 204
 205     def rev(self, offset=1):
 206         self.checkPos(self._pos - offset)
 207         self._pos -= offset
 208
 209     def read(self, offset):
 210         self.checkPos(self._pos + offset)
 211         start = self._pos
 212         self._pos += offset
 213         return self._src[start:self._pos]
 214
 215     def readTo(self, to, start=None):
 216         self.checkPos(to)
 217         if start == None:
 218             start = self._pos
 219         self._pos = to
 220         return self._src[start:to]
 221
 222
 223     def readToEOL(self, start=None, gobble=True):
 224         EOLmatch = EOLZre.search(self.src(), self.pos())
 225         if gobble:
 226             pos = EOLmatch.end()
 227         else:
 228             pos = EOLmatch.start()
 229         return self.readTo(to=pos, start=start)
 230
 231
 232     def find(self, it, pos=None):
 233         if pos == None:
 234             pos = self._pos
 235         return self._src.find(it, pos )
 236
 237     def startswith(self, it, pos=None):
 238         if self.find(it, pos) == self.pos():
 239             return True
 240         else:
 241             return False
 242
 243     def rfind(self, it, pos):
 244         if pos == None:
 245             pos = self._pos
 246         return self._src.rfind(it, pos)
 247
 248     def findBOL(self, pos=None):
 249         if pos == None:
 250             pos = self._pos
 251         src = self.src()
 252         return max(src.rfind('\n',0,pos)+1, src.rfind('\r',0,pos)+1, 0)
 253
 254     def findEOL(self, pos=None, gobble=False):
 255         if pos == None:
 256             pos = self._pos
 257
 258         match = EOLZre.search(self.src(), pos)
 259         if gobble:
 260             return match.end()
 261         else:
 262             return match.start()
 263
 264     def isLineClearToPos(self, pos=None):
 265         if pos == None:
 266             pos = self.pos()
 267         self.checkPos(pos)
 268         src = self.src()
 269         BOL = self.findBOL()
 270         return BOL == pos or src[BOL:pos].isspace()
 271
 272     def matches(self, strOrRE):
 273         if isinstance(strOrRE, (str, unicode)):
 274             return self.startswith(strOrRE, pos=self.pos())
 275         else: # assume an re object
 276             return strOrRE.match(self.src(), self.pos())
 277
 278     def matchWhiteSpace(self, WSchars=' \f\t'):
 279         return (not self.atEnd()) and  self.peek() in WSchars
 280
 281     def getWhiteSpace(self, max=None, WSchars=' \f\t'):
 282         if not self.matchWhiteSpace(WSchars):
 283             return ''
 284         start = self.pos()
 285         breakPoint = self.breakPoint()
 286         if max is not None:
 287             breakPoint = min(breakPoint, self.pos()+max)
 288         while self.pos() < breakPoint:
 289             self.advance()
 290             if not self.matchWhiteSpace(WSchars):
 291                 break
 292         return self.src()[start:self.pos()]
 293
 294     def matchNonWhiteSpace(self, WSchars=' \f\t\n\r'):
 295         return self.atEnd() or not self.peek() in WSchars
 296
 297     def getNonWhiteSpace(self, WSchars=' \f\t\n\r'):
 298         if not self.matchNonWhiteSpace(WSchars):
 299             return ''
 300         start = self.pos()
 301         while self.pos() < self.breakPoint():
 302             self.advance()
 303             if not self.matchNonWhiteSpace(WSchars):
 304                 break
 305         return self.src()[start:self.pos()]