pyx/reader.py

   1 import cStringIO, struct
   2
   3
   4 class reader:
   5
   6     def __init__(self, filename):
   7         self.file = open(filename, "rb")
   8
   9     def close(self):
  10         self.file.close()
  11
  12     def tell(self):
  13         return self.file.tell()
  14
  15     def eof(self):
  16         return self.file.eof()
  17
  18     def read(self, bytes):
  19         return self.file.read(bytes)
  20
  21     def readint(self, bytes=4, signed=0):
  22         first = 1
  23         result = 0
  24         while bytes:
  25             value = ord(self.file.read(1))
  26             if first and signed and value > 127:
  27                 value -= 256
  28             first = 0
  29             result = 256 * result + value
  30             bytes -= 1
  31         return result
  32
  33     def readint32(self):
  34         return struct.unpack(">l", self.file.read(4))[0]
  35
  36     def readuint32(self):
  37         return struct.unpack(">L", self.file.read(4))[0]
  38
  39     def readint24(self):
  40         # XXX: checkme
  41         return struct.unpack(">l", "\0"+self.file.read(3))[0]
  42
  43     def readuint24(self):
  44         # XXX: checkme
  45         return struct.unpack(">L", "\0"+self.file.read(3))[0]
  46
  47     def readint16(self):
  48         return struct.unpack(">h", self.file.read(2))[0]
  49
  50     def readuint16(self):
  51         return struct.unpack(">H", self.file.read(2))[0]
  52
  53     def readchar(self):
  54         return struct.unpack("b", self.file.read(1))[0]
  55
  56     def readuchar(self):
  57         return struct.unpack("B", self.file.read(1))[0]
  58
  59     def readstring(self, bytes):
  60         l = self.readuchar()
  61         assert l <= bytes-1, "inconsistency in file: string too long"
  62         return self.file.read(bytes-1)[:l]
  63
  64
  65
  66 class stringreader(reader):
  67
  68     def __init__(self, s):
  69         self.file = cStringIO.StringIO(s)
  70
  71
  72 class PStokenizer:
  73     """cursor to read a string token by token"""
  74
  75     def __init__(self, data, startstring=None, eattokensep=1, tokenseps=" \t\r\n", tokenstarts="()<>[]{}/%"):
  76         """creates a cursor for the string data
  77
  78         startstring is a string at which the cursor should start at. The first
  79         ocurance of startstring is used. When startstring is not in data, an
  80         exception is raised, otherwise the cursor is set to the position right
  81         after the startstring. When eattokenseps is set, startstring must be
  82         followed by a tokensep and this first tokensep is also consumed.
  83         tokenseps is a string containing characters to be used as token
  84         separators. tokenstarts is a string containing characters which
  85         directly (even without intermediate token separator) start a new token.
  86         """
  87         self.data = data
  88         if startstring is not None:
  89             self.pos = self.data.index(startstring) + len(startstring)
  90         else:
  91             self.pos = 0
  92         self.tokenseps = tokenseps
  93         self.tokenstarts = tokenstarts
  94         if eattokensep:
  95             if self.data[self.pos] not in self.tokenstarts:
  96                 if self.data[self.pos] not in self.tokenseps:
  97                     raise ValueError("cursor initialization string is not followed by a token separator")
  98                 self.pos += 1
  99
 100     def gettoken(self):
 101         """get the next token
 102
 103         Leading token separators and comments are silently consumed. The first token
 104         separator after the token is also silently consumed."""
 105         while self.data[self.pos] in self.tokenseps:
 106             self.pos += 1
 107         # ignore comments including subsequent whitespace characters
 108         while self.data[self.pos] == "%":
 109             while self.data[self.pos] not in "\r\n":
 110                 self.pos += 1
 111             while self.data[self.pos] in self.tokenseps:
 112                 self.pos += 1
 113         startpos = self.pos
 114         while self.data[self.pos] not in self.tokenseps:
 115             # any character in self.tokenstarts ends the token
 116             if self.pos>startpos and self.data[self.pos] in self.tokenstarts:
 117                 break
 118             self.pos += 1
 119         result = self.data[startpos:self.pos]
 120         if self.data[self.pos] in self.tokenseps:
 121             self.pos += 1 # consume a single tokensep
 122         return result
 123
 124     def getint(self):
 125         """get the next token as an integer"""
 126         return int(self.gettoken())
 127
 128     def getbytes(self, count):
 129         """get the next count bytes"""
 130         startpos = self.pos
 131         self.pos += count
 132         return self.data[startpos: self.pos]
 133
 134
 135