Fix references to the --write-symbol-info option.
[cvs2svn.git] / cvs2svn_rcsparse / common.py
blob3eed6004a8916bfad7a5fcdce63b694052e86184
1 # -*-python-*-
3 # Copyright (C) 1999-2006 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 """common.py: common classes and functions for the RCS parsing tools."""
15 import calendar
16 import string
18 class Sink:
19 def set_head_revision(self, revision):
20 pass
22 def set_principal_branch(self, branch_name):
23 pass
25 def set_access(self, accessors):
26 pass
28 def define_tag(self, name, revision):
29 pass
31 def set_locker(self, revision, locker):
32 pass
34 def set_locking(self, mode):
35 """Used to signal locking mode.
37 Called with mode argument 'strict' if strict locking
38 Not called when no locking used."""
40 pass
42 def set_comment(self, comment):
43 pass
45 def set_expansion(self, mode):
46 pass
48 def admin_completed(self):
49 pass
51 def define_revision(self, revision, timestamp, author, state,
52 branches, next):
53 pass
55 def tree_completed(self):
56 pass
58 def set_description(self, description):
59 pass
61 def set_revision_info(self, revision, log, text):
62 pass
64 def parse_completed(self):
65 pass
68 # --------------------------------------------------------------------------
70 # EXCEPTIONS USED BY RCSPARSE
73 class RCSParseError(Exception):
74 pass
77 class RCSIllegalCharacter(RCSParseError):
78 pass
81 class RCSExpected(RCSParseError):
82 def __init__(self, got, wanted):
83 RCSParseError.__init__(
84 self,
85 'Unexpected parsing error in RCS file.\n'
86 'Expected token: %s, but saw: %s'
87 % (wanted, got)
91 class RCSStopParser(Exception):
92 pass
95 # --------------------------------------------------------------------------
97 # STANDARD TOKEN STREAM-BASED PARSER
100 class _Parser:
101 stream_class = None # subclasses need to define this
103 def _read_until_semicolon(self):
104 """Read all tokens up to and including the next semicolon token.
106 Return the tokens (not including the semicolon) as a list."""
108 tokens = []
110 while 1:
111 token = self.ts.get()
112 if token == ';':
113 break
114 tokens.append(token)
116 return tokens
118 def _parse_admin_head(self, token):
119 rev = self.ts.get()
120 if rev == ';':
121 # The head revision is not specified. Just drop the semicolon
122 # on the floor.
123 pass
124 else:
125 self.sink.set_head_revision(rev)
126 self.ts.match(';')
128 def _parse_admin_branch(self, token):
129 branch = self.ts.get()
130 if branch != ';':
131 self.sink.set_principal_branch(branch)
132 self.ts.match(';')
134 def _parse_admin_access(self, token):
135 accessors = self._read_until_semicolon()
136 if accessors:
137 self.sink.set_access(accessors)
139 def _parse_admin_symbols(self, token):
140 while 1:
141 tag_name = self.ts.get()
142 if tag_name == ';':
143 break
144 self.ts.match(':')
145 tag_rev = self.ts.get()
146 self.sink.define_tag(tag_name, tag_rev)
148 def _parse_admin_locks(self, token):
149 while 1:
150 locker = self.ts.get()
151 if locker == ';':
152 break
153 self.ts.match(':')
154 rev = self.ts.get()
155 self.sink.set_locker(rev, locker)
157 def _parse_admin_strict(self, token):
158 self.sink.set_locking("strict")
159 self.ts.match(';')
161 def _parse_admin_comment(self, token):
162 self.sink.set_comment(self.ts.get())
163 self.ts.match(';')
165 def _parse_admin_expand(self, token):
166 expand_mode = self.ts.get()
167 self.sink.set_expansion(expand_mode)
168 self.ts.match(';')
170 admin_token_map = {
171 'head' : _parse_admin_head,
172 'branch' : _parse_admin_branch,
173 'access' : _parse_admin_access,
174 'symbols' : _parse_admin_symbols,
175 'locks' : _parse_admin_locks,
176 'strict' : _parse_admin_strict,
177 'comment' : _parse_admin_comment,
178 'expand' : _parse_admin_expand,
179 'desc' : None,
182 def parse_rcs_admin(self):
183 while 1:
184 # Read initial token at beginning of line
185 token = self.ts.get()
187 try:
188 f = self.admin_token_map[token]
189 except KeyError:
190 # We're done once we reach the description of the RCS tree
191 if token[0] in string.digits:
192 self.ts.unget(token)
193 return
194 else:
195 # Chew up "newphrase"
196 # warn("Unexpected RCS token: $token\n")
197 pass
198 else:
199 if f is None:
200 self.ts.unget(token)
201 return
202 else:
203 f(self, token)
205 def _parse_rcs_tree_entry(self, revision):
206 # Parse date
207 self.ts.match('date')
208 date = self.ts.get()
209 self.ts.match(';')
211 # Convert date into timestamp
212 date_fields = string.split(date, '.')
213 # According to rcsfile(5): the year "contains just the last two
214 # digits of the year for years from 1900 through 1999, and all the
215 # digits of years thereafter".
216 if len(date_fields[0]) == 2:
217 date_fields[0] = '19' + date_fields[0]
218 date_fields = map(string.atoi, date_fields)
219 EPOCH = 1970
220 if date_fields[0] < EPOCH:
221 raise ValueError, 'invalid year'
222 timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
224 # Parse author
225 ### NOTE: authors containing whitespace are violations of the
226 ### RCS specification. We are making an allowance here because
227 ### CVSNT is known to produce these sorts of authors.
228 self.ts.match('author')
229 author = ' '.join(self._read_until_semicolon())
231 # Parse state
232 self.ts.match('state')
233 state = ''
234 while 1:
235 token = self.ts.get()
236 if token == ';':
237 break
238 state = state + token + ' '
239 state = state[:-1] # toss the trailing space
241 # Parse branches
242 self.ts.match('branches')
243 branches = self._read_until_semicolon()
245 # Parse revision of next delta in chain
246 self.ts.match('next')
247 next = self.ts.get()
248 if next == ';':
249 next = None
250 else:
251 self.ts.match(';')
253 # there are some files with extra tags in them. for example:
254 # owner 640;
255 # group 15;
256 # permissions 644;
257 # hardlinks @configure.in@;
258 # this is "newphrase" in RCSFILE(5). we just want to skip over these.
259 while 1:
260 token = self.ts.get()
261 if token == 'desc' or token[0] in string.digits:
262 self.ts.unget(token)
263 break
264 # consume everything up to the semicolon
265 self._read_until_semicolon()
267 self.sink.define_revision(revision, timestamp, author, state, branches,
268 next)
270 def parse_rcs_tree(self):
271 while 1:
272 revision = self.ts.get()
274 # End of RCS tree description ?
275 if revision == 'desc':
276 self.ts.unget(revision)
277 return
279 self._parse_rcs_tree_entry(revision)
281 def parse_rcs_description(self):
282 self.ts.match('desc')
283 self.sink.set_description(self.ts.get())
285 def parse_rcs_deltatext(self):
286 while 1:
287 revision = self.ts.get()
288 if revision is None:
289 # EOF
290 break
291 text, sym2, log, sym1 = self.ts.mget(4)
292 if sym1 != 'log':
293 print `text[:100], sym2[:100], log[:100], sym1[:100]`
294 raise RCSExpected(sym1, 'log')
295 if sym2 != 'text':
296 raise RCSExpected(sym2, 'text')
297 ### need to add code to chew up "newphrase"
298 self.sink.set_revision_info(revision, log, text)
300 def parse(self, file, sink):
301 self.ts = self.stream_class(file)
302 self.sink = sink
304 self.parse_rcs_admin()
306 # let sink know when the admin section has been completed
307 self.sink.admin_completed()
309 self.parse_rcs_tree()
311 # many sinks want to know when the tree has been completed so they can
312 # do some work to prep for the arrival of the deltatext
313 self.sink.tree_completed()
315 self.parse_rcs_description()
316 self.parse_rcs_deltatext()
318 # easiest for us to tell the sink it is done, rather than worry about
319 # higher level software doing it.
320 self.sink.parse_completed()
322 self.ts = self.sink = None
324 # --------------------------------------------------------------------------