2 # Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved.
4 # By using this file, you agree to the terms and conditions set forth in
5 # the LICENSE.html file which can be found at the top level of the ViewCVS
6 # distribution or at http://viewcvs.sourceforge.net/license-1.html.
9 # Greg Stein, PO Box 760, Palo Alto, CA, 94302
10 # gstein@lyra.org, http://viewcvs.sourceforge.net/
12 # -----------------------------------------------------------------------
14 # This software is being maintained as part of the ViewCVS project.
15 # Information is available at:
16 # http://viewcvs.sourceforge.net/
18 # -----------------------------------------------------------------------
22 # note: this will raise an ImportError if it isn't available. the rcsparse
23 # package will recognize this and switch over to the default parser.
24 from mx
import TextTools
32 _idchar_list
= map(chr, range(33, 127)) + map(chr, range(160, 256))
33 _idchar_list
.remove('$')
34 _idchar_list
.remove(',')
35 #_idchar_list.remove('.') leave as part of 'num' symbol
36 _idchar_list
.remove(':')
37 _idchar_list
.remove(';')
38 _idchar_list
.remove('@')
39 _idchar
= string
.join(_idchar_list
, '')
40 _idchar_set
= _tt
.set(_idchar
)
42 _onechar_token_set
= _tt
.set(':;')
44 _not_at_set
= _tt
.invset('@')
51 _E_COMPLETE
= 100 # ended on a complete token
52 _E_TOKEN
= 110 # ended mid-token
53 _E_STRING_SPAN
= 130 # ended within a string
54 _E_STRING_END
= 140 # ended with string-end ('@') (could be mid-@@)
59 _CONTINUE
= 'CONTINUE'
63 # continuation of a token over a chunk boundary
65 (_T_TOKEN
, _tt
.AllInSet
, _idchar_set
),
70 # the algorithm is about the same speed for any CHUNK_SIZE chosen.
71 # grab a good-sized chunk, but not too large to overwhelm memory.
72 # note: we use a multiple of a standard block size
73 CHUNK_SIZE
= 192 * 512 # about 100k
75 # CHUNK_SIZE = 5 # for debugging, make the function grind...
77 def __init__(self
, file):
82 self
.string_end
= None
84 def _parse_chunk(self
, buf
, start
=0):
85 "Get the next token from the RCS file."
91 # construct a tag table which refers to the buffer we need to parse.
93 # ignore whitespace. with or without whitespace, move to the next rule.
94 (None, _tt
.AllInSet
, _tt
.whitespace_set
, +1),
96 (_E_COMPLETE
, _tt
.EOF
+ _tt
.AppendTagobj
, _tt
.Here
, +1, _SUCCESS
),
98 # accumulate token text and exit, or move to the next rule.
99 (_UNUSED
, _tt
.AllInSet
+ _tt
.AppendMatch
, _idchar_set
, +2),
101 (_E_TOKEN
, _tt
.EOF
+ _tt
.AppendTagobj
, _tt
.Here
, -3, _SUCCESS
),
103 # single character tokens exit immediately, or move to the next rule
104 (_UNUSED
, _tt
.IsInSet
+ _tt
.AppendMatch
, _onechar_token_set
, +2),
106 (_E_COMPLETE
, _tt
.EOF
+ _tt
.AppendTagobj
, _tt
.Here
, -5, _SUCCESS
),
108 # if this isn't an '@' symbol, then we have a syntax error (go to a
109 # negative index to indicate that condition). otherwise, suck it up
110 # and move to the next rule.
111 (_T_STRING_START
, _tt
.Is
+ _tt
.AppendTagobj
, '@'),
113 (None, _tt
.Is
, '@', +4, +1),
114 (buf
, _tt
.Is
, '@', +1, -1),
115 (_T_STRING_END
, _tt
.Skip
+ _tt
.AppendTagobj
, 0, 0, +1),
116 (_E_STRING_END
, _tt
.EOF
+ _tt
.AppendTagobj
, _tt
.Here
, -10, _SUCCESS
),
118 (_E_STRING_SPAN
, _tt
.EOF
+ _tt
.AppendTagobj
, _tt
.Here
, +1, _SUCCESS
),
120 # suck up everything that isn't an AT. go to next rule to look for EOF
121 (buf
, _tt
.AllInSet
, _not_at_set
, 0, +1),
123 # go back to look for double AT if we aren't at the end of the string
124 (_E_STRING_SPAN
, _tt
.EOF
+ _tt
.AppendTagobj
, _tt
.Here
, -6, _SUCCESS
),
127 success
, taglist
, idx
= _tt
.tag(buf
, table
, start
)
130 ### need a better way to report this error
131 raise common
.RCSIllegalCharacter()
134 # pop off the last item
135 last_which
= taglist
.pop()
140 if taglist
[i
] == _T_STRING_START
:
143 if taglist
[j
] == _T_STRING_END
:
144 s
= _tt
.join(taglist
, '', i
+1, j
)
151 assert last_which
== _E_STRING_SPAN
152 s
= _tt
.join(taglist
, '', i
+1)
154 self
.partial
= (_T_STRING_SPAN
, [ s
])
158 # figure out whether we have a partial last-token
159 if last_which
== _E_TOKEN
:
160 self
.partial
= (_T_TOKEN
, [ taglist
.pop() ])
161 elif last_which
== _E_COMPLETE
:
163 elif last_which
== _E_STRING_SPAN
:
166 assert last_which
== _E_STRING_END
167 self
.partial
= (_T_STRING_END
, [ taglist
.pop() ])
170 taglist
.extend(self
.tokens
)
171 self
.tokens
= taglist
173 def _set_end(self
, taglist
, text
, l
, r
, subtags
):
176 def _handle_partial(self
, buf
):
177 which
, chunks
= self
.partial
178 if which
== _T_TOKEN
:
179 success
, taglist
, idx
= _tt
.tag(buf
, _c_token_table
)
181 # The start of this buffer was not a token. So the end of the
182 # prior buffer was a complete token.
183 self
.tokens
.insert(0, string
.join(chunks
, ''))
185 assert len(taglist
) == 1 and taglist
[0][0] == _T_TOKEN \
186 and taglist
[0][1] == 0 and taglist
[0][2] == idx
189 # The whole buffer was one huge token, so we may have a
190 # partial token again.
192 # Note: this modifies the list of chunks in self.partial
196 # consumed the whole buffer
199 # got the rest of the token.
200 chunks
.append(buf
[:idx
])
201 self
.tokens
.insert(0, string
.join(chunks
, ''))
203 # no more partial token
208 if which
== _T_STRING_END
:
210 self
.tokens
.insert(0, string
.join(chunks
, ''))
217 self
.string_end
= None
219 (None, _tt
.Is
, '@', +3, +1),
220 (_UNUSED
, _tt
.Is
+ _tt
.AppendMatch
, '@', +1, -1),
221 (self
._set
_end
, _tt
.Skip
+ _tt
.CallTag
, 0, 0, _SUCCESS
),
223 (None, _tt
.EOF
, _tt
.Here
, +1, _SUCCESS
),
225 # suck up everything that isn't an AT. move to next rule to look
227 (_UNUSED
, _tt
.AllInSet
+ _tt
.AppendMatch
, _not_at_set
, 0, +1),
229 # go back to look for double AT if we aren't at the end of the string
230 (None, _tt
.EOF
, _tt
.Here
, -5, _SUCCESS
),
233 success
, unused
, idx
= _tt
.tag(buf
, string_table
,
234 start
, len(buf
), chunks
)
236 # must have matched at least one item
239 if self
.string_end
is None:
240 assert idx
== len(buf
)
241 self
.partial
= (_T_STRING_SPAN
, chunks
)
242 elif self
.string_end
< len(buf
):
244 self
.tokens
.insert(0, string
.join(chunks
, ''))
246 self
.partial
= (_T_STRING_END
, chunks
)
250 def _parse_more(self
):
251 buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
256 idx
= self
._handle
_partial
(buf
)
260 self
._parse
_chunk
(buf
, idx
)
262 self
._parse
_chunk
(buf
)
268 return self
.tokens
.pop()
272 while not self
.tokens
:
273 action
= self
._parse
_more
()
277 return self
.tokens
.pop()
286 def match(self
, match
):
288 token
= self
.tokens
.pop()
290 raise RuntimeError, ('Unexpected parsing error in RCS file.\n'
291 'Expected token: %s, but saw: %s'
296 raise RuntimeError, ('Unexpected parsing error in RCS file.\n'
297 'Expected token: %s, but saw: %s'
300 def unget(self
, token
):
301 self
.tokens
.append(token
)
303 def mget(self
, count
):
304 "Return multiple tokens. 'next' is at the end."
305 while len(self
.tokens
) < count
:
306 action
= self
._parse
_more
()
309 raise RuntimeError, 'EOF hit while expecting tokens'
310 result
= self
.tokens
[-count
:]
311 del self
.tokens
[-count
:]
315 class Parser(common
._Parser
):
316 stream_class
= _mxTokenStream