3 # Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 # This file was originally based on portions of the blame.py script by
16 # -----------------------------------------------------------------------
22 token_term
= string
.whitespace
+ ';:'
24 # the algorithm is about the same speed for any CHUNK_SIZE chosen.
25 # grab a good-sized chunk, but not too large to overwhelm memory.
26 # note: we use a multiple of a standard block size
27 CHUNK_SIZE
= 192 * 512 # about 100k
29 # CHUNK_SIZE = 5 # for debugging, make the function grind...
31 def __init__(self
, file):
34 self
.buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
36 raise RuntimeError, 'EOF'
39 "Get the next token from the RCS file."
41 # Note: we can afford to loop within Python, examining individual
42 # characters. For the whitespace and tokens, the number of iterations
43 # is typically quite small. Thus, a simple iterative loop will beat
44 # out more complex solutions.
51 buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
53 # signal EOF by returning None as the token
54 del self
.buf
# so we fail if get() is called again
58 if buf
[idx
] not in string
.whitespace
:
63 if buf
[idx
] == ';' or buf
[idx
] == ':':
72 # find token characters in the current buffer
73 while end
< len(buf
) and buf
[end
] not in self
.token_term
:
75 token
= token
+ buf
[idx
:end
]
78 # we stopped before the end, so we have a full token
82 # we stopped at the end of the buffer, so we may have a partial token
83 buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
90 # a "string" which starts with the "@" character. we'll skip it when we
99 buf
= self
.rcsfile
.read(self
.CHUNK_SIZE
)
101 raise RuntimeError, 'EOF'
102 i
= string
.find(buf
, '@', idx
)
104 chunks
.append(buf
[idx
:])
107 if i
== len(buf
) - 1:
108 chunks
.append(buf
[idx
:i
])
110 buf
= '@' + self
.rcsfile
.read(self
.CHUNK_SIZE
)
112 raise RuntimeError, 'EOF'
114 if buf
[i
+ 1] == '@':
115 chunks
.append(buf
[idx
:i
+1])
119 chunks
.append(buf
[idx
:i
])
124 return string
.join(chunks
, '')
132 def match(self
, match
):
133 "Try to match the next token from the input buffer."
137 raise common
.RCSExpected(token
, match
)
139 def unget(self
, token
):
140 "Put this token back, for the next get() to return."
142 # Override the class' .get method with a function which clears the
143 # overridden method then returns the pushed token. Since this function
144 # will not be looked up via the class mechanism, it should be a "normal"
145 # function, meaning it won't have "self" automatically inserted.
146 # Therefore, we need to pass both self and the token thru via defaults.
148 # note: we don't put this into the input buffer because it may have been
149 # @-unescaped already.
151 def give_it_back(self
=self
, token
=token
):
155 self
.get
= give_it_back
157 def mget(self
, count
):
158 "Return multiple tokens. 'next' is at the end."
160 for i
in range(count
):
161 result
.append(self
.get())
166 class Parser(common
._Parser
):
167 stream_class
= _TokenStream