Inline method SVNCommitItem.has_keywords().
[cvs2svn.git] / cvs2svn_rcsparse / default.py
blobcb1171415039c845b63b3aa7c5e14896eed9a64d
1 # -*-python-*-
3 # Copyright (C) 1999-2008 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 # This file was originally based on portions of the blame.py script by
14 # Curt Hagenlocher.
16 # -----------------------------------------------------------------------
18 import string
19 import common
21 class _TokenStream:
22 token_term = string.whitespace + ';:'
24 # the algorithm is about the same speed for any CHUNK_SIZE chosen.
25 # grab a good-sized chunk, but not too large to overwhelm memory.
26 # note: we use a multiple of a standard block size
27 CHUNK_SIZE = 192 * 512 # about 100k
29 # CHUNK_SIZE = 5 # for debugging, make the function grind...
31 def __init__(self, file):
32 self.rcsfile = file
33 self.idx = 0
34 self.buf = self.rcsfile.read(self.CHUNK_SIZE)
35 if self.buf == '':
36 raise RuntimeError, 'EOF'
38 def get(self):
39 "Get the next token from the RCS file."
41 # Note: we can afford to loop within Python, examining individual
42 # characters. For the whitespace and tokens, the number of iterations
43 # is typically quite small. Thus, a simple iterative loop will beat
44 # out more complex solutions.
46 buf = self.buf
47 idx = self.idx
49 while 1:
50 if idx == len(buf):
51 buf = self.rcsfile.read(self.CHUNK_SIZE)
52 if buf == '':
53 # signal EOF by returning None as the token
54 del self.buf # so we fail if get() is called again
55 return None
56 idx = 0
58 if buf[idx] not in string.whitespace:
59 break
61 idx = idx + 1
63 if buf[idx] == ';' or buf[idx] == ':':
64 self.buf = buf
65 self.idx = idx + 1
66 return buf[idx]
68 if buf[idx] != '@':
69 end = idx + 1
70 token = ''
71 while 1:
72 # find token characters in the current buffer
73 while end < len(buf) and buf[end] not in self.token_term:
74 end = end + 1
75 token = token + buf[idx:end]
77 if end < len(buf):
78 # we stopped before the end, so we have a full token
79 idx = end
80 break
82 # we stopped at the end of the buffer, so we may have a partial token
83 buf = self.rcsfile.read(self.CHUNK_SIZE)
84 idx = end = 0
86 self.buf = buf
87 self.idx = idx
88 return token
90 # a "string" which starts with the "@" character. we'll skip it when we
91 # search for content.
92 idx = idx + 1
94 chunks = [ ]
96 while 1:
97 if idx == len(buf):
98 idx = 0
99 buf = self.rcsfile.read(self.CHUNK_SIZE)
100 if buf == '':
101 raise RuntimeError, 'EOF'
102 i = string.find(buf, '@', idx)
103 if i == -1:
104 chunks.append(buf[idx:])
105 idx = len(buf)
106 continue
107 if i == len(buf) - 1:
108 chunks.append(buf[idx:i])
109 idx = 0
110 buf = '@' + self.rcsfile.read(self.CHUNK_SIZE)
111 if buf == '@':
112 raise RuntimeError, 'EOF'
113 continue
114 if buf[i + 1] == '@':
115 chunks.append(buf[idx:i+1])
116 idx = i + 2
117 continue
119 chunks.append(buf[idx:i])
121 self.buf = buf
122 self.idx = i + 1
124 return string.join(chunks, '')
126 # _get = get
127 # def get(self):
128 token = self._get()
129 print 'T:', `token`
130 return token
132 def match(self, match):
133 "Try to match the next token from the input buffer."
135 token = self.get()
136 if token != match:
137 raise common.RCSExpected(token, match)
139 def unget(self, token):
140 "Put this token back, for the next get() to return."
142 # Override the class' .get method with a function which clears the
143 # overridden method then returns the pushed token. Since this function
144 # will not be looked up via the class mechanism, it should be a "normal"
145 # function, meaning it won't have "self" automatically inserted.
146 # Therefore, we need to pass both self and the token thru via defaults.
148 # note: we don't put this into the input buffer because it may have been
149 # @-unescaped already.
151 def give_it_back(self=self, token=token):
152 del self.get
153 return token
155 self.get = give_it_back
157 def mget(self, count):
158 "Return multiple tokens. 'next' is at the end."
159 result = [ ]
160 for i in range(count):
161 result.append(self.get())
162 result.reverse()
163 return result
166 class Parser(common._Parser):
167 stream_class = _TokenStream