Refer to the filenames that are actually in the test repository.
[cvs2svn.git] / cvs2svn_rcsparse / common.py
blob006cc34014dd24754f4373763b49c84d1bae5f00
1 # -*-python-*-
3 # Copyright (C) 1999-2008 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 """common.py: common classes and functions for the RCS parsing tools."""
15 import calendar
16 import string
18 class Sink:
19 """Interface to be implemented by clients. The RCS parser calls this as
20 it parses the RCS file.
22 All these methods have stub implementations that do nothing, so you only
23 have to override the callbacks that you care about.
24 """
25 def set_head_revision(self, revision):
26 """Reports the head revision for this RCS file.
28 This is the value of the 'head' header in the admin section of the RCS
29 file. This function can only be called before admin_completed().
31 Parameter: REVISION is a string containing a revision number. This is
32 an actual revision number, not a branch number.
33 """
34 pass
36 def set_principal_branch(self, branch_name):
37 """Reports the principal branch for this RCS file. This is only called
38 if the principal branch is not trunk.
40 This is the value of the 'branch' header in the admin section of the RCS
41 file. This function can only be called before admin_completed().
43 Parameter: BRANCH_NAME is a string containing a branch number. If this
44 function is called, the parameter is typically "1.1.1", indicating the
45 vendor branch.
46 """
47 pass
49 def set_access(self, accessors):
50 """Reports the access control list for this RCS file. This function is
51 only called if the ACL is set. If this function is not called then
52 there is no ACL and all users are allowed access.
54 This is the value of the 'access' header in the admin section of the RCS
55 file. This function can only be called before admin_completed().
57 Parameter: ACCESSORS is a list of strings. Each string is a username.
58 The user is allowed access if and only if their username is in the list,
59 OR the user owns the RCS file on disk, OR the user is root.
61 Note that CVS typically doesn't use this field.
62 """
63 pass
65 def define_tag(self, name, revision):
66 """Reports a tag or branch definition. This function will be called
67 once for each tag or branch.
69 This is taken from the 'symbols' header in the admin section of the RCS
70 file. This function can only be called before admin_completed().
72 Parameters: NAME is a string containing the tag or branch name.
73 REVISION is a string containing a revision number. This may be
74 an actual revision number (for a tag) or a branch number.
76 The revision number consists of a number of decimal components separated
77 by dots. There are three common forms. If there are an odd number of
78 components, it's a branch. Otherwise, if the next-to-last component is
79 zero, it's a branch (and the next-to-last component is an artifact of
80 CVS and should not be shown to the user). Otherwise, it's a tag.
82 This function is called in the order that the tags appear in the RCS
83 file header. For CVS, this appears to be in reverse chronological
84 order of tag/branch creation.
85 """
86 pass
88 def set_locker(self, revision, locker):
89 """Reports a lock on this RCS file. This function will be called once
90 for each lock.
92 This is taken from the 'locks' header in the admin section of the RCS
93 file. This function can only be called before admin_completed().
95 Parameters: REVISION is a string containing a revision number. This is
96 an actual revision number, not a branch number.
97 LOCKER is a string containing a username.
98 """
99 pass
101 def set_locking(self, mode):
102 """Signals strict locking mode. This function will be called if and
103 only if the RCS file is in strict locking mode.
105 This is taken from the 'strict' header in the admin section of the RCS
106 file. This function can only be called before admin_completed().
108 Parameters: MODE is always the string 'strict'.
110 pass
112 def set_comment(self, comment):
113 """Reports the comment for this RCS file.
115 This is the value of the 'comment' header in the admin section of the
116 RCS file. This function can only be called before admin_completed().
118 Parameter: COMMENT is a string containing the comment. This may be
119 multi-line.
121 This field does not seem to be used by CVS.
123 pass
125 def set_expansion(self, mode):
126 """Reports the keyword expansion mode for this RCS file.
128 This is the value of the 'expand' header in the admin section of the
129 RCS file. This function can only be called before admin_completed().
131 Parameter: MODE is a string containing the keyword expansion mode.
132 Possible values include 'o' and 'b', amongst others.
134 pass
136 def admin_completed(self):
137 """Reports that the initial RCS header has been parsed. This function is
138 called exactly once.
140 pass
142 def define_revision(self, revision, timestamp, author, state,
143 branches, next):
144 """Reports metadata about a single revision.
146 This function is called for each revision. It is called later than
147 admin_completed() and earlier than tree_completed().
149 Parameter: REVISION is a revision number, as a string. This is an
150 actual revision number, not a branch number.
151 TIMESTAMP is the date and time that the revision was created, as an
152 integer number of seconds since the epoch. (I.e. "UNIX time" format).
153 AUTHOR is the author name, as a string.
154 STATE is the state of the revision, as a string. Common values are
155 "Exp" and "dead".
156 BRANCHES is a list of strings, with each string being an actual
157 revision number (not a branch number). For each branch which is based
158 on this revision and has commits, the revision number of the first
159 branch commit is listed here.
160 NEXT is either None or a string representing an actual revision number
161 (not a branch number).
163 When on trunk, NEXT points to what humans might consider to be the
164 'previous' revision number. For example, 1.3's NEXT is 1.2.
165 However, on a branch, NEXT really does point to what humans would
166 consider to be the 'next' revision number. For example, 1.1.2.1's
167 NEXT would be 1.1.2.2.
168 In other words, NEXT always means "where to find the next deltatext
169 that you need this revision to retrieve".
171 pass
173 def tree_completed(self):
174 """Reports that the RCS revision tree has been parsed. This function is
175 called exactly once. This function will be called later than
176 admin_completed().
178 pass
180 def set_description(self, description):
181 """Reports the description from the RCS file. This is set using the
182 "-m" flag to "cvs add". However, many CVS users don't use that option,
183 so this is often empty.
185 This function is called once, after tree_completed().
187 Parameter: DESCRIPTION is a string containing the description. This may
188 be multi-line.
190 pass
192 def set_revision_info(self, revision, log, text):
193 """Reports the log message and contents of a CVS revision.
195 This function is called for each revision. It is called later than
196 set_description().
198 Parameters: REVISION is a string containing the actual revision number.
199 LOG is a string containing the log message. This may be multi-line.
200 TEXT is the contents of the file in this revision, either as full-text or
201 as a diff. This is usually multi-line, and often quite large and/or
202 binary.
204 pass
206 def parse_completed(self):
207 """Reports that parsing an RCS file is complete.
209 This function is called once. After it is called, no more calls will be
210 made via this interface.
212 pass
215 # --------------------------------------------------------------------------
217 # EXCEPTIONS USED BY RCSPARSE
220 class RCSParseError(Exception):
221 pass
224 class RCSIllegalCharacter(RCSParseError):
225 pass
228 class RCSExpected(RCSParseError):
229 def __init__(self, got, wanted):
230 RCSParseError.__init__(
231 self,
232 'Unexpected parsing error in RCS file.\n'
233 'Expected token: %s, but saw: %s'
234 % (wanted, got)
238 class RCSStopParser(Exception):
239 pass
242 # --------------------------------------------------------------------------
244 # STANDARD TOKEN STREAM-BASED PARSER
247 class _Parser:
248 stream_class = None # subclasses need to define this
250 def _read_until_semicolon(self):
251 """Read all tokens up to and including the next semicolon token.
253 Return the tokens (not including the semicolon) as a list."""
255 tokens = []
257 while 1:
258 token = self.ts.get()
259 if token == ';':
260 break
261 tokens.append(token)
263 return tokens
265 def _parse_admin_head(self, token):
266 rev = self.ts.get()
267 if rev == ';':
268 # The head revision is not specified. Just drop the semicolon
269 # on the floor.
270 pass
271 else:
272 self.sink.set_head_revision(rev)
273 self.ts.match(';')
275 def _parse_admin_branch(self, token):
276 branch = self.ts.get()
277 if branch != ';':
278 self.sink.set_principal_branch(branch)
279 self.ts.match(';')
281 def _parse_admin_access(self, token):
282 accessors = self._read_until_semicolon()
283 if accessors:
284 self.sink.set_access(accessors)
286 def _parse_admin_symbols(self, token):
287 while 1:
288 tag_name = self.ts.get()
289 if tag_name == ';':
290 break
291 self.ts.match(':')
292 tag_rev = self.ts.get()
293 self.sink.define_tag(tag_name, tag_rev)
295 def _parse_admin_locks(self, token):
296 while 1:
297 locker = self.ts.get()
298 if locker == ';':
299 break
300 self.ts.match(':')
301 rev = self.ts.get()
302 self.sink.set_locker(rev, locker)
304 def _parse_admin_strict(self, token):
305 self.sink.set_locking("strict")
306 self.ts.match(';')
308 def _parse_admin_comment(self, token):
309 self.sink.set_comment(self.ts.get())
310 self.ts.match(';')
312 def _parse_admin_expand(self, token):
313 expand_mode = self.ts.get()
314 self.sink.set_expansion(expand_mode)
315 self.ts.match(';')
317 admin_token_map = {
318 'head' : _parse_admin_head,
319 'branch' : _parse_admin_branch,
320 'access' : _parse_admin_access,
321 'symbols' : _parse_admin_symbols,
322 'locks' : _parse_admin_locks,
323 'strict' : _parse_admin_strict,
324 'comment' : _parse_admin_comment,
325 'expand' : _parse_admin_expand,
326 'desc' : None,
329 def parse_rcs_admin(self):
330 while 1:
331 # Read initial token at beginning of line
332 token = self.ts.get()
334 try:
335 f = self.admin_token_map[token]
336 except KeyError:
337 # We're done once we reach the description of the RCS tree
338 if token[0] in string.digits:
339 self.ts.unget(token)
340 return
341 else:
342 # Chew up "newphrase"
343 # warn("Unexpected RCS token: $token\n")
344 pass
345 else:
346 if f is None:
347 self.ts.unget(token)
348 return
349 else:
350 f(self, token)
352 def _parse_rcs_tree_entry(self, revision):
353 # Parse date
354 self.ts.match('date')
355 date = self.ts.get()
356 self.ts.match(';')
358 # Convert date into standard UNIX time format (seconds since epoch)
359 date_fields = string.split(date, '.')
360 # According to rcsfile(5): the year "contains just the last two
361 # digits of the year for years from 1900 through 1999, and all the
362 # digits of years thereafter".
363 if len(date_fields[0]) == 2:
364 date_fields[0] = '19' + date_fields[0]
365 date_fields = map(string.atoi, date_fields)
366 EPOCH = 1970
367 if date_fields[0] < EPOCH:
368 raise ValueError, 'invalid year for revision %s' % (revision,)
369 try:
370 timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
371 except ValueError, e:
372 raise ValueError, 'invalid date for revision %s: %s' % (revision, e,)
374 # Parse author
375 ### NOTE: authors containing whitespace are violations of the
376 ### RCS specification. We are making an allowance here because
377 ### CVSNT is known to produce these sorts of authors.
378 self.ts.match('author')
379 author = ' '.join(self._read_until_semicolon())
381 # Parse state
382 self.ts.match('state')
383 state = ''
384 while 1:
385 token = self.ts.get()
386 if token == ';':
387 break
388 state = state + token + ' '
389 state = state[:-1] # toss the trailing space
391 # Parse branches
392 self.ts.match('branches')
393 branches = self._read_until_semicolon()
395 # Parse revision of next delta in chain
396 self.ts.match('next')
397 next = self.ts.get()
398 if next == ';':
399 next = None
400 else:
401 self.ts.match(';')
403 # there are some files with extra tags in them. for example:
404 # owner 640;
405 # group 15;
406 # permissions 644;
407 # hardlinks @configure.in@;
408 # commitid mLiHw3bulRjnTDGr;
409 # this is "newphrase" in RCSFILE(5). we just want to skip over these.
410 while 1:
411 token = self.ts.get()
412 if token == 'desc' or token[0] in string.digits:
413 self.ts.unget(token)
414 break
415 # consume everything up to the semicolon
416 self._read_until_semicolon()
418 self.sink.define_revision(revision, timestamp, author, state, branches,
419 next)
421 def parse_rcs_tree(self):
422 while 1:
423 revision = self.ts.get()
425 # End of RCS tree description ?
426 if revision == 'desc':
427 self.ts.unget(revision)
428 return
430 self._parse_rcs_tree_entry(revision)
432 def parse_rcs_description(self):
433 self.ts.match('desc')
434 self.sink.set_description(self.ts.get())
436 def parse_rcs_deltatext(self):
437 while 1:
438 revision = self.ts.get()
439 if revision is None:
440 # EOF
441 break
442 text, sym2, log, sym1 = self.ts.mget(4)
443 if sym1 != 'log':
444 print `text[:100], sym2[:100], log[:100], sym1[:100]`
445 raise RCSExpected(sym1, 'log')
446 if sym2 != 'text':
447 raise RCSExpected(sym2, 'text')
448 ### need to add code to chew up "newphrase"
449 self.sink.set_revision_info(revision, log, text)
451 def parse(self, file, sink):
452 """Parse an RCS file.
454 Parameters: FILE is the file object to parse. (I.e. an object of the
455 built-in Python type "file", usually created using Python's built-in
456 "open()" function).
457 SINK is an instance of (some subclass of) Sink. It's methods will be
458 called as the file is parsed; see the definition of Sink for the
459 details.
461 self.ts = self.stream_class(file)
462 self.sink = sink
464 self.parse_rcs_admin()
466 # let sink know when the admin section has been completed
467 self.sink.admin_completed()
469 self.parse_rcs_tree()
471 # many sinks want to know when the tree has been completed so they can
472 # do some work to prep for the arrival of the deltatext
473 self.sink.tree_completed()
475 self.parse_rcs_description()
476 self.parse_rcs_deltatext()
478 # easiest for us to tell the sink it is done, rather than worry about
479 # higher level software doing it.
480 self.sink.parse_completed()
482 self.ts = self.sink = None
484 # --------------------------------------------------------------------------