Update to r1393290 of svntest.
[cvs2svn.git] / cvs2svn_rcsparse / default.py
blob5face4cab90d34b8eb1fb4ec10eabd55b3f8c318
1 # -*-python-*-
3 # Copyright (C) 1999-2008 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 # This file was originally based on portions of the blame.py script by
14 # Curt Hagenlocher.
16 # -----------------------------------------------------------------------
18 import string
19 import common
21 class _TokenStream:
22 token_term = string.whitespace + ";:"
23 try:
24 token_term = frozenset(token_term)
25 except NameError:
26 pass
28 # the algorithm is about the same speed for any CHUNK_SIZE chosen.
29 # grab a good-sized chunk, but not too large to overwhelm memory.
30 # note: we use a multiple of a standard block size
31 CHUNK_SIZE = 192 * 512 # about 100k
33 # CHUNK_SIZE = 5 # for debugging, make the function grind...
35 def __init__(self, file):
36 self.rcsfile = file
37 self.idx = 0
38 self.buf = self.rcsfile.read(self.CHUNK_SIZE)
39 if self.buf == '':
40 raise RuntimeError, 'EOF'
42 def get(self):
43 "Get the next token from the RCS file."
45 # Note: we can afford to loop within Python, examining individual
46 # characters. For the whitespace and tokens, the number of iterations
47 # is typically quite small. Thus, a simple iterative loop will beat
48 # out more complex solutions.
50 buf = self.buf
51 lbuf = len(buf)
52 idx = self.idx
54 while 1:
55 if idx == lbuf:
56 buf = self.rcsfile.read(self.CHUNK_SIZE)
57 if buf == '':
58 # signal EOF by returning None as the token
59 del self.buf # so we fail if get() is called again
60 return None
61 lbuf = len(buf)
62 idx = 0
64 if buf[idx] not in string.whitespace:
65 break
67 idx = idx + 1
69 if buf[idx] in ';:':
70 self.buf = buf
71 self.idx = idx + 1
72 return buf[idx]
74 if buf[idx] != '@':
75 end = idx + 1
76 token = ''
77 while 1:
78 # find token characters in the current buffer
79 while end < lbuf and buf[end] not in self.token_term:
80 end = end + 1
81 token = token + buf[idx:end]
83 if end < lbuf:
84 # we stopped before the end, so we have a full token
85 idx = end
86 break
88 # we stopped at the end of the buffer, so we may have a partial token
89 buf = self.rcsfile.read(self.CHUNK_SIZE)
90 lbuf = len(buf)
91 idx = end = 0
93 self.buf = buf
94 self.idx = idx
95 return token
97 # a "string" which starts with the "@" character. we'll skip it when we
98 # search for content.
99 idx = idx + 1
101 chunks = [ ]
103 while 1:
104 if idx == lbuf:
105 idx = 0
106 buf = self.rcsfile.read(self.CHUNK_SIZE)
107 if buf == '':
108 raise RuntimeError, 'EOF'
109 lbuf = len(buf)
110 i = string.find(buf, '@', idx)
111 if i == -1:
112 chunks.append(buf[idx:])
113 idx = lbuf
114 continue
115 if i == lbuf - 1:
116 chunks.append(buf[idx:i])
117 idx = 0
118 buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
119 if buf == '@':
120 raise RuntimeError, 'EOF'
121 lbuf = len(buf)
122 continue
123 if buf[i + 1] == '@':
124 chunks.append(buf[idx:i+1])
125 idx = i + 2
126 continue
128 chunks.append(buf[idx:i])
130 self.buf = buf
131 self.idx = i + 1
133 return string.join(chunks, '')
135 # _get = get
136 # def get(self):
137 token = self._get()
138 print 'T:', `token`
139 return token
141 def match(self, match):
142 "Try to match the next token from the input buffer."
144 token = self.get()
145 if token != match:
146 raise common.RCSExpected(token, match)
148 def unget(self, token):
149 "Put this token back, for the next get() to return."
151 # Override the class' .get method with a function which clears the
152 # overridden method then returns the pushed token. Since this function
153 # will not be looked up via the class mechanism, it should be a "normal"
154 # function, meaning it won't have "self" automatically inserted.
155 # Therefore, we need to pass both self and the token thru via defaults.
157 # note: we don't put this into the input buffer because it may have been
158 # @-unescaped already.
160 def give_it_back(self=self, token=token):
161 del self.get
162 return token
164 self.get = give_it_back
166 def mget(self, count):
167 "Return multiple tokens. 'next' is at the end."
168 result = [ ]
169 for i in range(count):
170 result.append(self.get())
171 result.reverse()
172 return result
175 class Parser(common._Parser):
176 stream_class = _TokenStream