Use ctx.tmpdir consistently in cvs2git-example.options.
[cvs2svn.git] / cvs2svn_rcsparse / default.py
blob24825e9a6fea0e7c95956e2a464d4b26d9c21234
1 # -*-python-*-
3 # Copyright (C) 1999-2014 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 # This file was originally based on portions of the blame.py script by
14 # Curt Hagenlocher.
16 # -----------------------------------------------------------------------
18 import string
19 import common
21 class _TokenStream:
22 token_term = string.whitespace + ";:"
23 try:
24 token_term = frozenset(token_term)
25 except NameError:
26 pass
28 # the algorithm is about the same speed for any CHUNK_SIZE chosen.
29 # grab a good-sized chunk, but not too large to overwhelm memory.
30 # note: we use a multiple of a standard block size
31 CHUNK_SIZE = 192 * 512 # about 100k
33 # CHUNK_SIZE = 5 # for debugging, make the function grind...
35 def __init__(self, file):
36 self.rcsfile = file
37 self.idx = 0
38 self.buf = self.rcsfile.read(self.CHUNK_SIZE)
39 if self.buf == '':
40 raise RuntimeError, 'EOF'
42 def get(self):
43 "Get the next token from the RCS file."
45 # Note: we can afford to loop within Python, examining individual
46 # characters. For the whitespace and tokens, the number of iterations
47 # is typically quite small. Thus, a simple iterative loop will beat
48 # out more complex solutions.
50 buf = self.buf
51 lbuf = len(buf)
52 idx = self.idx
54 while 1:
55 if idx == lbuf:
56 buf = self.rcsfile.read(self.CHUNK_SIZE)
57 if buf == '':
58 # signal EOF by returning None as the token
59 del self.buf # so we fail if get() is called again
60 return None
61 lbuf = len(buf)
62 idx = 0
64 if buf[idx] not in string.whitespace:
65 break
67 idx = idx + 1
69 if buf[idx] in ';:':
70 self.buf = buf
71 self.idx = idx + 1
72 return buf[idx]
74 if buf[idx] != '@':
75 end = idx + 1
76 token = ''
77 while 1:
78 # find token characters in the current buffer
79 while end < lbuf and buf[end] not in self.token_term:
80 end = end + 1
81 token = token + buf[idx:end]
83 if end < lbuf:
84 # we stopped before the end, so we have a full token
85 idx = end
86 break
88 # we stopped at the end of the buffer, so we may have a partial token
89 buf = self.rcsfile.read(self.CHUNK_SIZE)
90 if buf == '':
91 # signal EOF by returning None as the token
92 del self.buf # so we fail if get() is called again
93 return None
94 lbuf = len(buf)
95 idx = end = 0
97 self.buf = buf
98 self.idx = idx
99 return token
101 # a "string" which starts with the "@" character. we'll skip it when we
102 # search for content.
103 idx = idx + 1
105 chunks = [ ]
107 while 1:
108 if idx == lbuf:
109 idx = 0
110 buf = self.rcsfile.read(self.CHUNK_SIZE)
111 if buf == '':
112 raise RuntimeError, 'EOF'
113 lbuf = len(buf)
114 i = string.find(buf, '@', idx)
115 if i == -1:
116 chunks.append(buf[idx:])
117 idx = lbuf
118 continue
119 if i == lbuf - 1:
120 chunks.append(buf[idx:i])
121 idx = 0
122 buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
123 if buf == '@':
124 raise RuntimeError, 'EOF'
125 lbuf = len(buf)
126 continue
127 if buf[i + 1] == '@':
128 chunks.append(buf[idx:i+1])
129 idx = i + 2
130 continue
132 chunks.append(buf[idx:i])
134 self.buf = buf
135 self.idx = i + 1
137 return string.join(chunks, '')
139 # _get = get
140 # def get(self):
141 token = self._get()
142 print 'T:', `token`
143 return token
145 def match(self, match):
146 "Try to match the next token from the input buffer."
148 token = self.get()
149 if token != match:
150 raise common.RCSExpected(token, match)
152 def unget(self, token):
153 "Put this token back, for the next get() to return."
155 # Override the class' .get method with a function which clears the
156 # overridden method then returns the pushed token. Since this function
157 # will not be looked up via the class mechanism, it should be a "normal"
158 # function, meaning it won't have "self" automatically inserted.
159 # Therefore, we need to pass both self and the token thru via defaults.
161 # note: we don't put this into the input buffer because it may have been
162 # @-unescaped already.
164 def give_it_back(self=self, token=token):
165 del self.get
166 return token
168 self.get = give_it_back
170 def mget(self, count):
171 "Return multiple tokens. 'next' is at the end."
172 result = [ ]
173 for i in range(count):
174 result.append(self.get())
175 result.reverse()
176 return result
179 class Parser(common._Parser):
180 stream_class = _TokenStream