pyx/reader.py

   1 # -*- encoding: utf-8 -*-
   2 #
   3 #
   4 # Copyright (C) 2007-2011 Jörg Lehmann <joergl@users.sourceforge.net>
   5 # Copyright (C) 2007-2011 André Wobst <wobsta@users.sourceforge.net>
   6 #
   7 # This file is part of PyX (http://pyx.sourceforge.net/).
   8 #
   9 # PyX is free software; you can redistribute it and/or modify
  10 # it under the terms of the GNU General Public License as published by
  11 # the Free Software Foundation; either version 2 of the License, or
  12 # (at your option) any later version.
  13 #
  14 # PyX is distributed in the hope that it will be useful,
  15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 # GNU General Public License for more details.
  18 #
  19 # You should have received a copy of the GNU General Public License
  20 # along with PyX; if not, write to the Free Software
  21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  22
  23
  24 import io, struct
  25
  26
  27 class reader:
  28
  29     def __init__(self, filename):
  30         self.file = open(filename, "rb")
  31
  32     def close(self):
  33         self.file.close()
  34
  35     def tell(self):
  36         return self.file.tell()
  37
  38     def eof(self):
  39         return self.file.eof()
  40
  41     def read(self, bytes):
  42         return self.file.read(bytes)
  43
  44     def readint(self, bytes=4, signed=0):
  45         first = 1
  46         result = 0
  47         while bytes:
  48             value = ord(self.file.read(1))
  49             if first and signed and value > 127:
  50                 value -= 256
  51             first = 0
  52             result = 256 * result + value
  53             bytes -= 1
  54         return result
  55
  56     def readint32(self):
  57         return struct.unpack(">l", self.file.read(4))[0]
  58
  59     def readuint32(self):
  60         return struct.unpack(">L", self.file.read(4))[0]
  61
  62     def readint24(self):
  63         # XXX: checkme
  64         return struct.unpack(">l", "\0"+self.file.read(3))[0]
  65
  66     def readuint24(self):
  67         # XXX: checkme
  68         return struct.unpack(">L", "\0"+self.file.read(3))[0]
  69
  70     def readint16(self):
  71         return struct.unpack(">h", self.file.read(2))[0]
  72
  73     def readuint16(self):
  74         return struct.unpack(">H", self.file.read(2))[0]
  75
  76     def readchar(self):
  77         return struct.unpack("b", self.file.read(1))[0]
  78
  79     def readuchar(self):
  80         return struct.unpack("B", self.file.read(1))[0]
  81
  82     def readstring(self, bytes):
  83         l = self.readuchar()
  84         assert l <= bytes-1, "inconsistency in file: string too long"
  85         return self.file.read(bytes-1)[:l]
  86
  87
  88
  89 class stringreader(reader):
  90
  91     def __init__(self, s):
  92         self.file = io.StringIO(s)
  93
  94
  95 class PStokenizer:
  96     """cursor to read a string token by token"""
  97
  98     def __init__(self, data, startstring=None, eattokensep=1, tokenseps=" \t\r\n", tokenstarts="()<>[]{}/%"):
  99         """creates a cursor for the string data
 100
 101         startstring is a string at which the cursor should start at. The first
 102         ocurance of startstring is used. When startstring is not in data, an
 103         exception is raised, otherwise the cursor is set to the position right
 104         after the startstring. When eattokenseps is set, startstring must be
 105         followed by a tokensep and this first tokensep is also consumed.
 106         tokenseps is a string containing characters to be used as token
 107         separators. tokenstarts is a string containing characters which
 108         directly (even without intermediate token separator) start a new token.
 109         """
 110         self.data = data
 111         if startstring is not None:
 112             self.pos = self.data.index(startstring) + len(startstring)
 113         else:
 114             self.pos = 0
 115         self.tokenseps = tokenseps
 116         self.tokenstarts = tokenstarts
 117         if eattokensep:
 118             if self.data[self.pos] not in self.tokenstarts:
 119                 if self.data[self.pos] not in self.tokenseps:
 120                     raise ValueError("cursor initialization string is not followed by a token separator")
 121                 self.pos += 1
 122
 123     def gettoken(self):
 124         """get the next token
 125
 126         Leading token separators and comments are silently consumed. The first token
 127         separator after the token is also silently consumed."""
 128         while self.data[self.pos] in self.tokenseps:
 129             self.pos += 1
 130         # ignore comments including subsequent whitespace characters
 131         while self.data[self.pos] == "%":
 132             while self.data[self.pos] not in "\r\n":
 133                 self.pos += 1
 134             while self.data[self.pos] in self.tokenseps:
 135                 self.pos += 1
 136         startpos = self.pos
 137         while self.data[self.pos] not in self.tokenseps:
 138             # any character in self.tokenstarts ends the token
 139             if self.pos>startpos and self.data[self.pos] in self.tokenstarts:
 140                 break
 141             self.pos += 1
 142         result = self.data[startpos:self.pos]
 143         if self.data[self.pos] in self.tokenseps:
 144             self.pos += 1 # consume a single tokensep
 145         return result
 146
 147     def getint(self):
 148         """get the next token as an integer"""
 149         return int(self.gettoken())
 150
 151     def getbytes(self, count):
 152         """get the next count bytes"""
 153         startpos = self.pos
 154         self.pos += count
 155         return self.data[startpos: self.pos]
 156
 157
 158