3 # Copyright (C) 1999-2011 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 """common.py: common classes and functions for the RCS parsing tools."""
19 """Interface to be implemented by clients. The RCS parser calls this as
20 it parses the RCS file.
22 All these methods have stub implementations that do nothing, so you only
23 have to override the callbacks that you care about.
25 def set_head_revision(self
, revision
):
26 """Reports the head revision for this RCS file.
28 This is the value of the 'head' header in the admin section of the RCS
29 file. This function can only be called before admin_completed().
31 Parameter: REVISION is a string containing a revision number. This is
32 an actual revision number, not a branch number.
36 def set_principal_branch(self
, branch_name
):
37 """Reports the principal branch for this RCS file. This is only called
38 if the principal branch is not trunk.
40 This is the value of the 'branch' header in the admin section of the RCS
41 file. This function can only be called before admin_completed().
43 Parameter: BRANCH_NAME is a string containing a branch number. If this
44 function is called, the parameter is typically "1.1.1", indicating the
49 def set_access(self
, accessors
):
50 """Reports the access control list for this RCS file. This function is
51 only called if the ACL is set. If this function is not called then
52 there is no ACL and all users are allowed access.
54 This is the value of the 'access' header in the admin section of the RCS
55 file. This function can only be called before admin_completed().
57 Parameter: ACCESSORS is a list of strings. Each string is a username.
58 The user is allowed access if and only if their username is in the list,
59 OR the user owns the RCS file on disk, OR the user is root.
61 Note that CVS typically doesn't use this field.
65 def define_tag(self
, name
, revision
):
66 """Reports a tag or branch definition. This function will be called
67 once for each tag or branch.
69 This is taken from the 'symbols' header in the admin section of the RCS
70 file. This function can only be called before admin_completed().
72 Parameters: NAME is a string containing the tag or branch name.
73 REVISION is a string containing a revision number. This may be
74 an actual revision number (for a tag) or a branch number.
76 The revision number consists of a number of decimal components separated
77 by dots. There are three common forms. If there are an odd number of
78 components, it's a branch. Otherwise, if the next-to-last component is
79 zero, it's a branch (and the next-to-last component is an artifact of
80 CVS and should not be shown to the user). Otherwise, it's a tag.
82 This function is called in the order that the tags appear in the RCS
83 file header. For CVS, this appears to be in reverse chronological
84 order of tag/branch creation.
88 def set_locker(self
, revision
, locker
):
89 """Reports a lock on this RCS file. This function will be called once
92 This is taken from the 'locks' header in the admin section of the RCS
93 file. This function can only be called before admin_completed().
95 Parameters: REVISION is a string containing a revision number. This is
96 an actual revision number, not a branch number.
97 LOCKER is a string containing a username.
101 def set_locking(self
, mode
):
102 """Signals strict locking mode. This function will be called if and
103 only if the RCS file is in strict locking mode.
105 This is taken from the 'strict' header in the admin section of the RCS
106 file. This function can only be called before admin_completed().
108 Parameters: MODE is always the string 'strict'.
112 def set_comment(self
, comment
):
113 """Reports the comment for this RCS file.
115 This is the value of the 'comment' header in the admin section of the
116 RCS file. This function can only be called before admin_completed().
118 Parameter: COMMENT is a string containing the comment. This may be
121 This field does not seem to be used by CVS.
125 def set_expansion(self
, mode
):
126 """Reports the keyword expansion mode for this RCS file.
128 This is the value of the 'expand' header in the admin section of the
129 RCS file. This function can only be called before admin_completed().
131 Parameter: MODE is a string containing the keyword expansion mode.
132 Possible values include 'o' and 'b', amongst others.
136 def admin_completed(self
):
137 """Reports that the initial RCS header has been parsed. This function is
142 def define_revision(self
, revision
, timestamp
, author
, state
,
144 """Reports metadata about a single revision.
146 This function is called for each revision. It is called later than
147 admin_completed() and earlier than tree_completed().
149 Parameter: REVISION is a revision number, as a string. This is an
150 actual revision number, not a branch number.
151 TIMESTAMP is the date and time that the revision was created, as an
152 integer number of seconds since the epoch. (I.e. "UNIX time" format).
153 AUTHOR is the author name, as a string.
154 STATE is the state of the revision, as a string. Common values are
156 BRANCHES is a list of strings, with each string being an actual
157 revision number (not a branch number). For each branch which is based
158 on this revision and has commits, the revision number of the first
159 branch commit is listed here.
160 NEXT is either None or a string representing an actual revision number
161 (not a branch number).
163 When on trunk, NEXT points to what humans might consider to be the
164 'previous' revision number. For example, 1.3's NEXT is 1.2.
165 However, on a branch, NEXT really does point to what humans would
166 consider to be the 'next' revision number. For example, 1.1.2.1's
167 NEXT would be 1.1.2.2.
168 In other words, NEXT always means "where to find the next deltatext
169 that you need this revision to retrieve".
173 def tree_completed(self
):
174 """Reports that the RCS revision tree has been parsed. This function is
175 called exactly once. This function will be called later than
180 def set_description(self
, description
):
181 """Reports the description from the RCS file. This is set using the
182 "-m" flag to "cvs add". However, many CVS users don't use that option,
183 so this is often empty.
185 This function is called once, after tree_completed().
187 Parameter: DESCRIPTION is a string containing the description. This may
192 def set_revision_info(self
, revision
, log
, text
):
193 """Reports the log message and contents of a CVS revision.
195 This function is called for each revision. It is called later than
198 Parameters: REVISION is a string containing the actual revision number.
199 LOG is a string containing the log message. This may be multi-line.
200 TEXT is the contents of the file in this revision, either as full-text or
201 as a diff. This is usually multi-line, and often quite large and/or
206 def parse_completed(self
):
207 """Reports that parsing an RCS file is complete.
209 This function is called once. After it is called, no more calls will be
210 made via this interface.
215 # --------------------------------------------------------------------------
217 # EXCEPTIONS USED BY RCSPARSE
220 class RCSParseError(Exception):
224 class RCSIllegalCharacter(RCSParseError
):
228 class RCSExpected(RCSParseError
):
229 def __init__(self
, got
, wanted
):
230 RCSParseError
.__init
__(
232 'Unexpected parsing error in RCS file.\n'
233 'Expected token: %s, but saw: %s'
238 class RCSStopParser(Exception):
242 # --------------------------------------------------------------------------
244 # STANDARD TOKEN STREAM-BASED PARSER
248 stream_class
= None # subclasses need to define this
250 def _read_until_semicolon(self
):
251 """Read all tokens up to and including the next semicolon token.
253 Return the tokens (not including the semicolon) as a list."""
258 token
= self
.ts
.get()
265 def _parse_admin_head(self
, token
):
268 # The head revision is not specified. Just drop the semicolon
272 self
.sink
.set_head_revision(rev
)
275 def _parse_admin_branch(self
, token
):
276 branch
= self
.ts
.get()
278 self
.sink
.set_principal_branch(branch
)
281 def _parse_admin_access(self
, token
):
282 accessors
= self
._read
_until
_semicolon
()
284 self
.sink
.set_access(accessors
)
286 def _parse_admin_symbols(self
, token
):
288 tag_name
= self
.ts
.get()
292 tag_rev
= self
.ts
.get()
293 self
.sink
.define_tag(tag_name
, tag_rev
)
295 def _parse_admin_locks(self
, token
):
297 locker
= self
.ts
.get()
302 self
.sink
.set_locker(rev
, locker
)
304 def _parse_admin_strict(self
, token
):
305 self
.sink
.set_locking("strict")
308 def _parse_admin_comment(self
, token
):
309 self
.sink
.set_comment(self
.ts
.get())
312 def _parse_admin_expand(self
, token
):
313 expand_mode
= self
.ts
.get()
314 self
.sink
.set_expansion(expand_mode
)
318 'head' : _parse_admin_head
,
319 'branch' : _parse_admin_branch
,
320 'access' : _parse_admin_access
,
321 'symbols' : _parse_admin_symbols
,
322 'locks' : _parse_admin_locks
,
323 'strict' : _parse_admin_strict
,
324 'comment' : _parse_admin_comment
,
325 'expand' : _parse_admin_expand
,
329 def parse_rcs_admin(self
):
331 # Read initial token at beginning of line
332 token
= self
.ts
.get()
335 f
= self
.admin_token_map
[token
]
337 # We're done once we reach the description of the RCS tree
338 if token
[0] in string
.digits
:
342 # Chew up "newphrase"
343 # warn("Unexpected RCS token: $token\n")
344 while self
.ts
.get() != ';':
353 def _parse_rcs_tree_entry(self
, revision
):
355 self
.ts
.match('date')
359 # Convert date into standard UNIX time format (seconds since epoch)
360 date_fields
= string
.split(date
, '.')
361 # According to rcsfile(5): the year "contains just the last two
362 # digits of the year for years from 1900 through 1999, and all the
363 # digits of years thereafter".
364 if len(date_fields
[0]) == 2:
365 date_fields
[0] = '19' + date_fields
[0]
366 date_fields
= map(string
.atoi
, date_fields
)
368 if date_fields
[0] < EPOCH
:
369 raise ValueError, 'invalid year for revision %s' % (revision
,)
371 timestamp
= calendar
.timegm(tuple(date_fields
) + (0, 0, 0,))
372 except ValueError, e
:
373 raise ValueError, 'invalid date for revision %s: %s' % (revision
, e
,)
376 ### NOTE: authors containing whitespace are violations of the
377 ### RCS specification. We are making an allowance here because
378 ### CVSNT is known to produce these sorts of authors.
379 self
.ts
.match('author')
380 author
= ' '.join(self
._read
_until
_semicolon
())
383 self
.ts
.match('state')
386 token
= self
.ts
.get()
389 state
= state
+ token
+ ' '
390 state
= state
[:-1] # toss the trailing space
393 self
.ts
.match('branches')
394 branches
= self
._read
_until
_semicolon
()
396 # Parse revision of next delta in chain
397 self
.ts
.match('next')
404 # there are some files with extra tags in them. for example:
408 # hardlinks @configure.in@;
409 # commitid mLiHw3bulRjnTDGr;
410 # this is "newphrase" in RCSFILE(5). we just want to skip over these.
412 token
= self
.ts
.get()
413 if token
== 'desc' or token
[0] in string
.digits
:
416 # consume everything up to the semicolon
417 self
._read
_until
_semicolon
()
419 self
.sink
.define_revision(revision
, timestamp
, author
, state
, branches
,
422 def parse_rcs_tree(self
):
424 revision
= self
.ts
.get()
426 # End of RCS tree description ?
427 if revision
== 'desc':
428 self
.ts
.unget(revision
)
431 self
._parse
_rcs
_tree
_entry
(revision
)
433 def parse_rcs_description(self
):
434 self
.ts
.match('desc')
435 self
.sink
.set_description(self
.ts
.get())
437 def parse_rcs_deltatext(self
):
439 revision
= self
.ts
.get()
443 text
, sym2
, log
, sym1
= self
.ts
.mget(4)
445 print `text
[:100], sym2
[:100], log
[:100], sym1
[:100]`
446 raise RCSExpected(sym1
, 'log')
448 raise RCSExpected(sym2
, 'text')
449 ### need to add code to chew up "newphrase"
450 self
.sink
.set_revision_info(revision
, log
, text
)
452 def parse(self
, file, sink
):
453 """Parse an RCS file.
455 Parameters: FILE is the file object to parse. (I.e. an object of the
456 built-in Python type "file", usually created using Python's built-in
458 SINK is an instance of (some subclass of) Sink. It's methods will be
459 called as the file is parsed; see the definition of Sink for the
462 self
.ts
= self
.stream_class(file)
465 self
.parse_rcs_admin()
467 # let sink know when the admin section has been completed
468 self
.sink
.admin_completed()
470 self
.parse_rcs_tree()
472 # many sinks want to know when the tree has been completed so they can
473 # do some work to prep for the arrival of the deltatext
474 self
.sink
.tree_completed()
476 self
.parse_rcs_description()
477 self
.parse_rcs_deltatext()
479 # easiest for us to tell the sink it is done, rather than worry about
480 # higher level software doing it.
481 self
.sink
.parse_completed()
483 self
.ts
= self
.sink
= None
485 # --------------------------------------------------------------------------