formats->format
[PyX/mjg.git] / pyx / reader.py
blob0f652b73ce11152df5466d40b8dfdf2310074567
1 import cStringIO, struct
4 class reader:
6 def __init__(self, filename):
7 self.file = open(filename, "rb")
9 def close(self):
10 self.file.close()
12 def tell(self):
13 return self.file.tell()
15 def eof(self):
16 return self.file.eof()
18 def read(self, bytes):
19 return self.file.read(bytes)
21 def readint(self, bytes=4, signed=0):
22 first = 1
23 result = 0
24 while bytes:
25 value = ord(self.file.read(1))
26 if first and signed and value > 127:
27 value -= 256
28 first = 0
29 result = 256 * result + value
30 bytes -= 1
31 return result
33 def readint32(self):
34 return struct.unpack(">l", self.file.read(4))[0]
36 def readuint32(self):
37 return struct.unpack(">L", self.file.read(4))[0]
39 def readint24(self):
40 # XXX: checkme
41 return struct.unpack(">l", "\0"+self.file.read(3))[0]
43 def readuint24(self):
44 # XXX: checkme
45 return struct.unpack(">L", "\0"+self.file.read(3))[0]
47 def readint16(self):
48 return struct.unpack(">h", self.file.read(2))[0]
50 def readuint16(self):
51 return struct.unpack(">H", self.file.read(2))[0]
53 def readchar(self):
54 return struct.unpack("b", self.file.read(1))[0]
56 def readuchar(self):
57 return struct.unpack("B", self.file.read(1))[0]
59 def readstring(self, bytes):
60 l = self.readuchar()
61 assert l <= bytes-1, "inconsistency in file: string too long"
62 return self.file.read(bytes-1)[:l]
66 class stringreader(reader):
68 def __init__(self, s):
69 self.file = cStringIO.StringIO(s)
72 class PStokenizer:
73 """cursor to read a string token by token"""
75 def __init__(self, data, startstring=None, eattokensep=1, tokenseps=" \t\r\n", tokenstarts="()<>[]{}/%"):
76 """creates a cursor for the string data
78 startstring is a string at which the cursor should start at. The first
79 ocurance of startstring is used. When startstring is not in data, an
80 exception is raised, otherwise the cursor is set to the position right
81 after the startstring. When eattokenseps is set, startstring must be
82 followed by a tokensep and this first tokensep is also consumed.
83 tokenseps is a string containing characters to be used as token
84 separators. tokenstarts is a string containing characters which
85 directly (even without intermediate token separator) start a new token.
86 """
87 self.data = data
88 if startstring is not None:
89 self.pos = self.data.index(startstring) + len(startstring)
90 else:
91 self.pos = 0
92 self.tokenseps = tokenseps
93 self.tokenstarts = tokenstarts
94 if eattokensep:
95 if self.data[self.pos] not in self.tokenstarts:
96 if self.data[self.pos] not in self.tokenseps:
97 raise ValueError("cursor initialization string is not followed by a token separator")
98 self.pos += 1
100 def gettoken(self):
101 """get the next token
103 Leading token separators and comments are silently consumed. The first token
104 separator after the token is also silently consumed."""
105 while self.data[self.pos] in self.tokenseps:
106 self.pos += 1
107 # ignore comments including subsequent whitespace characters
108 while self.data[self.pos] == "%":
109 while self.data[self.pos] not in "\r\n":
110 self.pos += 1
111 while self.data[self.pos] in self.tokenseps:
112 self.pos += 1
113 startpos = self.pos
114 while self.data[self.pos] not in self.tokenseps:
115 # any character in self.tokenstarts ends the token
116 if self.pos>startpos and self.data[self.pos] in self.tokenstarts:
117 break
118 self.pos += 1
119 result = self.data[startpos:self.pos]
120 if self.data[self.pos] in self.tokenseps:
121 self.pos += 1 # consume a single tokensep
122 return result
124 def getint(self):
125 """get the next token as an integer"""
126 return int(self.gettoken())
128 def getbytes(self, count):
129 """get the next count bytes"""
130 startpos = self.pos
131 self.pos += count
132 return self.data[startpos: self.pos]