cvs2git-example.options: Fix typo and formatting in comment.
[cvs2svn.git] / cvs2svn_rcsparse / common.py
bloba4911f1abd09dbde3b3b8e8ec9ea72922f93a192
1 # -*-python-*-
3 # Copyright (C) 1999-2011 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 """common.py: common classes and functions for the RCS parsing tools."""
15 import calendar
16 import string
18 class Sink:
19 """Interface to be implemented by clients. The RCS parser calls this as
20 it parses the RCS file.
22 All these methods have stub implementations that do nothing, so you only
23 have to override the callbacks that you care about.
24 """
25 def set_head_revision(self, revision):
26 """Reports the head revision for this RCS file.
28 This is the value of the 'head' header in the admin section of the RCS
29 file. This function can only be called before admin_completed().
31 Parameter: REVISION is a string containing a revision number. This is
32 an actual revision number, not a branch number.
33 """
34 pass
36 def set_principal_branch(self, branch_name):
37 """Reports the principal branch for this RCS file. This is only called
38 if the principal branch is not trunk.
40 This is the value of the 'branch' header in the admin section of the RCS
41 file. This function can only be called before admin_completed().
43 Parameter: BRANCH_NAME is a string containing a branch number. If this
44 function is called, the parameter is typically "1.1.1", indicating the
45 vendor branch.
46 """
47 pass
49 def set_access(self, accessors):
50 """Reports the access control list for this RCS file. This function is
51 only called if the ACL is set. If this function is not called then
52 there is no ACL and all users are allowed access.
54 This is the value of the 'access' header in the admin section of the RCS
55 file. This function can only be called before admin_completed().
57 Parameter: ACCESSORS is a list of strings. Each string is a username.
58 The user is allowed access if and only if their username is in the list,
59 OR the user owns the RCS file on disk, OR the user is root.
61 Note that CVS typically doesn't use this field.
62 """
63 pass
65 def define_tag(self, name, revision):
66 """Reports a tag or branch definition. This function will be called
67 once for each tag or branch.
69 This is taken from the 'symbols' header in the admin section of the RCS
70 file. This function can only be called before admin_completed().
72 Parameters: NAME is a string containing the tag or branch name.
73 REVISION is a string containing a revision number. This may be
74 an actual revision number (for a tag) or a branch number.
76 The revision number consists of a number of decimal components separated
77 by dots. There are three common forms. If there are an odd number of
78 components, it's a branch. Otherwise, if the next-to-last component is
79 zero, it's a branch (and the next-to-last component is an artifact of
80 CVS and should not be shown to the user). Otherwise, it's a tag.
82 This function is called in the order that the tags appear in the RCS
83 file header. For CVS, this appears to be in reverse chronological
84 order of tag/branch creation.
85 """
86 pass
88 def set_locker(self, revision, locker):
89 """Reports a lock on this RCS file. This function will be called once
90 for each lock.
92 This is taken from the 'locks' header in the admin section of the RCS
93 file. This function can only be called before admin_completed().
95 Parameters: REVISION is a string containing a revision number. This is
96 an actual revision number, not a branch number.
97 LOCKER is a string containing a username.
98 """
99 pass
101 def set_locking(self, mode):
102 """Signals strict locking mode. This function will be called if and
103 only if the RCS file is in strict locking mode.
105 This is taken from the 'strict' header in the admin section of the RCS
106 file. This function can only be called before admin_completed().
108 Parameters: MODE is always the string 'strict'.
110 pass
112 def set_comment(self, comment):
113 """Reports the comment for this RCS file.
115 This is the value of the 'comment' header in the admin section of the
116 RCS file. This function can only be called before admin_completed().
118 Parameter: COMMENT is a string containing the comment. This may be
119 multi-line.
121 This field does not seem to be used by CVS.
123 pass
125 def set_expansion(self, mode):
126 """Reports the keyword expansion mode for this RCS file.
128 This is the value of the 'expand' header in the admin section of the
129 RCS file. This function can only be called before admin_completed().
131 Parameter: MODE is a string containing the keyword expansion mode.
132 Possible values include 'o' and 'b', amongst others.
134 pass
136 def admin_completed(self):
137 """Reports that the initial RCS header has been parsed. This function is
138 called exactly once.
140 pass
142 def define_revision(self, revision, timestamp, author, state,
143 branches, next):
144 """Reports metadata about a single revision.
146 This function is called for each revision. It is called later than
147 admin_completed() and earlier than tree_completed().
149 Parameter: REVISION is a revision number, as a string. This is an
150 actual revision number, not a branch number.
151 TIMESTAMP is the date and time that the revision was created, as an
152 integer number of seconds since the epoch. (I.e. "UNIX time" format).
153 AUTHOR is the author name, as a string.
154 STATE is the state of the revision, as a string. Common values are
155 "Exp" and "dead".
156 BRANCHES is a list of strings, with each string being an actual
157 revision number (not a branch number). For each branch which is based
158 on this revision and has commits, the revision number of the first
159 branch commit is listed here.
160 NEXT is either None or a string representing an actual revision number
161 (not a branch number).
163 When on trunk, NEXT points to what humans might consider to be the
164 'previous' revision number. For example, 1.3's NEXT is 1.2.
165 However, on a branch, NEXT really does point to what humans would
166 consider to be the 'next' revision number. For example, 1.1.2.1's
167 NEXT would be 1.1.2.2.
168 In other words, NEXT always means "where to find the next deltatext
169 that you need this revision to retrieve".
171 pass
173 def tree_completed(self):
174 """Reports that the RCS revision tree has been parsed. This function is
175 called exactly once. This function will be called later than
176 admin_completed().
178 pass
180 def set_description(self, description):
181 """Reports the description from the RCS file. This is set using the
182 "-m" flag to "cvs add". However, many CVS users don't use that option,
183 so this is often empty.
185 This function is called once, after tree_completed().
187 Parameter: DESCRIPTION is a string containing the description. This may
188 be multi-line.
190 pass
192 def set_revision_info(self, revision, log, text):
193 """Reports the log message and contents of a CVS revision.
195 This function is called for each revision. It is called later than
196 set_description().
198 Parameters: REVISION is a string containing the actual revision number.
199 LOG is a string containing the log message. This may be multi-line.
200 TEXT is the contents of the file in this revision, either as full-text or
201 as a diff. This is usually multi-line, and often quite large and/or
202 binary.
204 pass
206 def parse_completed(self):
207 """Reports that parsing an RCS file is complete.
209 This function is called once. After it is called, no more calls will be
210 made via this interface.
212 pass
215 # --------------------------------------------------------------------------
217 # EXCEPTIONS USED BY RCSPARSE
220 class RCSParseError(Exception):
221 pass
224 class RCSIllegalCharacter(RCSParseError):
225 pass
228 class RCSExpected(RCSParseError):
229 def __init__(self, got, wanted):
230 RCSParseError.__init__(
231 self,
232 'Unexpected parsing error in RCS file.\n'
233 'Expected token: %s, but saw: %s'
234 % (wanted, got)
238 class RCSStopParser(Exception):
239 pass
242 # --------------------------------------------------------------------------
244 # STANDARD TOKEN STREAM-BASED PARSER
247 class _Parser:
248 stream_class = None # subclasses need to define this
250 def _read_until_semicolon(self):
251 """Read all tokens up to and including the next semicolon token.
253 Return the tokens (not including the semicolon) as a list."""
255 tokens = []
257 while 1:
258 token = self.ts.get()
259 if token == ';':
260 break
261 tokens.append(token)
263 return tokens
265 def _parse_admin_head(self, token):
266 rev = self.ts.get()
267 if rev == ';':
268 # The head revision is not specified. Just drop the semicolon
269 # on the floor.
270 pass
271 else:
272 self.sink.set_head_revision(rev)
273 self.ts.match(';')
275 def _parse_admin_branch(self, token):
276 branch = self.ts.get()
277 if branch != ';':
278 self.sink.set_principal_branch(branch)
279 self.ts.match(';')
281 def _parse_admin_access(self, token):
282 accessors = self._read_until_semicolon()
283 if accessors:
284 self.sink.set_access(accessors)
286 def _parse_admin_symbols(self, token):
287 while 1:
288 tag_name = self.ts.get()
289 if tag_name == ';':
290 break
291 self.ts.match(':')
292 tag_rev = self.ts.get()
293 self.sink.define_tag(tag_name, tag_rev)
295 def _parse_admin_locks(self, token):
296 while 1:
297 locker = self.ts.get()
298 if locker == ';':
299 break
300 self.ts.match(':')
301 rev = self.ts.get()
302 self.sink.set_locker(rev, locker)
304 def _parse_admin_strict(self, token):
305 self.sink.set_locking("strict")
306 self.ts.match(';')
308 def _parse_admin_comment(self, token):
309 self.sink.set_comment(self.ts.get())
310 self.ts.match(';')
312 def _parse_admin_expand(self, token):
313 expand_mode = self.ts.get()
314 self.sink.set_expansion(expand_mode)
315 self.ts.match(';')
317 admin_token_map = {
318 'head' : _parse_admin_head,
319 'branch' : _parse_admin_branch,
320 'access' : _parse_admin_access,
321 'symbols' : _parse_admin_symbols,
322 'locks' : _parse_admin_locks,
323 'strict' : _parse_admin_strict,
324 'comment' : _parse_admin_comment,
325 'expand' : _parse_admin_expand,
326 'desc' : None,
329 def parse_rcs_admin(self):
330 while 1:
331 # Read initial token at beginning of line
332 token = self.ts.get()
334 try:
335 f = self.admin_token_map[token]
336 except KeyError:
337 # We're done once we reach the description of the RCS tree
338 if token[0] in string.digits:
339 self.ts.unget(token)
340 return
341 else:
342 # Chew up "newphrase"
343 # warn("Unexpected RCS token: $token\n")
344 while self.ts.get() != ';':
345 pass
346 else:
347 if f is None:
348 self.ts.unget(token)
349 return
350 else:
351 f(self, token)
353 def _parse_rcs_tree_entry(self, revision):
354 # Parse date
355 self.ts.match('date')
356 date = self.ts.get()
357 self.ts.match(';')
359 # Convert date into standard UNIX time format (seconds since epoch)
360 date_fields = string.split(date, '.')
361 # According to rcsfile(5): the year "contains just the last two
362 # digits of the year for years from 1900 through 1999, and all the
363 # digits of years thereafter".
364 if len(date_fields[0]) == 2:
365 date_fields[0] = '19' + date_fields[0]
366 date_fields = map(string.atoi, date_fields)
367 EPOCH = 1970
368 if date_fields[0] < EPOCH:
369 raise ValueError, 'invalid year for revision %s' % (revision,)
370 try:
371 timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
372 except ValueError, e:
373 raise ValueError, 'invalid date for revision %s: %s' % (revision, e,)
375 # Parse author
376 ### NOTE: authors containing whitespace are violations of the
377 ### RCS specification. We are making an allowance here because
378 ### CVSNT is known to produce these sorts of authors.
379 self.ts.match('author')
380 author = ' '.join(self._read_until_semicolon())
382 # Parse state
383 self.ts.match('state')
384 state = ''
385 while 1:
386 token = self.ts.get()
387 if token == ';':
388 break
389 state = state + token + ' '
390 state = state[:-1] # toss the trailing space
392 # Parse branches
393 self.ts.match('branches')
394 branches = self._read_until_semicolon()
396 # Parse revision of next delta in chain
397 self.ts.match('next')
398 next = self.ts.get()
399 if next == ';':
400 next = None
401 else:
402 self.ts.match(';')
404 # there are some files with extra tags in them. for example:
405 # owner 640;
406 # group 15;
407 # permissions 644;
408 # hardlinks @configure.in@;
409 # commitid mLiHw3bulRjnTDGr;
410 # this is "newphrase" in RCSFILE(5). we just want to skip over these.
411 while 1:
412 token = self.ts.get()
413 if token == 'desc' or token[0] in string.digits:
414 self.ts.unget(token)
415 break
416 # consume everything up to the semicolon
417 self._read_until_semicolon()
419 self.sink.define_revision(revision, timestamp, author, state, branches,
420 next)
422 def parse_rcs_tree(self):
423 while 1:
424 revision = self.ts.get()
426 # End of RCS tree description ?
427 if revision == 'desc':
428 self.ts.unget(revision)
429 return
431 self._parse_rcs_tree_entry(revision)
433 def parse_rcs_description(self):
434 self.ts.match('desc')
435 self.sink.set_description(self.ts.get())
437 def parse_rcs_deltatext(self):
438 while 1:
439 revision = self.ts.get()
440 if revision is None:
441 # EOF
442 break
443 text, sym2, log, sym1 = self.ts.mget(4)
444 if sym1 != 'log':
445 print `text[:100], sym2[:100], log[:100], sym1[:100]`
446 raise RCSExpected(sym1, 'log')
447 if sym2 != 'text':
448 raise RCSExpected(sym2, 'text')
449 ### need to add code to chew up "newphrase"
450 self.sink.set_revision_info(revision, log, text)
452 def parse(self, file, sink):
453 """Parse an RCS file.
455 Parameters: FILE is the file object to parse. (I.e. an object of the
456 built-in Python type "file", usually created using Python's built-in
457 "open()" function).
458 SINK is an instance of (some subclass of) Sink. It's methods will be
459 called as the file is parsed; see the definition of Sink for the
460 details.
462 self.ts = self.stream_class(file)
463 self.sink = sink
465 self.parse_rcs_admin()
467 # let sink know when the admin section has been completed
468 self.sink.admin_completed()
470 self.parse_rcs_tree()
472 # many sinks want to know when the tree has been completed so they can
473 # do some work to prep for the arrival of the deltatext
474 self.sink.tree_completed()
476 self.parse_rcs_description()
477 self.parse_rcs_deltatext()
479 # easiest for us to tell the sink it is done, rather than worry about
480 # higher level software doing it.
481 self.sink.parse_completed()
483 self.ts = self.sink = None
485 # --------------------------------------------------------------------------