Document how various VCSs handle keywords, EOLs, and file permissions.
[cvs2svn.git] / cvs2svn_rcsparse / common.py
blobca9cff9b9e04440058657f2283238cbe0604a6a9
1 # -*-python-*-
3 # Copyright (C) 1999-2008 The ViewCVS Group. All Rights Reserved.
5 # By using this file, you agree to the terms and conditions set forth in
6 # the LICENSE.html file which can be found at the top level of the ViewVC
7 # distribution or at http://viewvc.org/license-1.html.
9 # For more information, visit http://viewvc.org/
11 # -----------------------------------------------------------------------
13 """common.py: common classes and functions for the RCS parsing tools."""
15 import calendar
16 import string
18 class Sink:
19 """Interface to be implemented by clients. The RCS parser calls this as
20 it parses the RCS file.
22 All these methods have stub implementations that do nothing, so you only
23 have to override the callbacks that you care about.
24 """
25 def set_head_revision(self, revision):
26 """Reports the head revision for this RCS file.
28 This is the value of the 'head' header in the admin section of the RCS
29 file. This function can only be called before admin_completed().
31 Parameter: REVISION is a string containing a revision number. This is
32 an actual revision number, not a branch number.
33 """
34 pass
36 def set_principal_branch(self, branch_name):
37 """Reports the principal branch for this RCS file. This is only called
38 if the principal branch is not trunk.
40 This is the value of the 'branch' header in the admin section of the RCS
41 file. This function can only be called before admin_completed().
43 Parameter: BRANCH_NAME is a string containing a branch number. If this
44 function is called, the parameter is typically "1.1.1", indicating the
45 vendor branch.
46 """
47 pass
49 def set_access(self, accessors):
50 """Reports the access control list for this RCS file. This function is
51 only called if the ACL is set. If this function is not called then
52 there is no ACL and all users are allowed access.
54 This is the value of the 'access' header in the admin section of the RCS
55 file. This function can only be called before admin_completed().
57 Parameter: ACCESSORS is a list of strings. Each string is a username.
58 The user is allowed access if and only if their username is in the list,
59 OR the user owns the RCS file on disk, OR the user is root.
61 Note that CVS typically doesn't use this field.
62 """
63 pass
65 def define_tag(self, name, revision):
66 """Reports a tag or branch definition. This function will be called
67 once for each tag or branch.
69 This is taken from the 'symbols' header in the admin section of the RCS
70 file. This function can only be called before admin_completed().
72 Parameters: NAME is a string containing the tag or branch name.
73 REVISION is a string containing a revision number. This may be
74 an actual revision number (for a tag) or a branch number.
76 The revision number consists of a number of decimal components separated
77 by dots. There are three common forms. If there are an odd number of
78 components, it's a branch. Otherwise, if the next-to-last component is
79 zero, it's a branch (and the next-to-last component is an artifact of
80 CVS and should not be shown to the user). Otherwise, it's a tag.
82 This function is called in the order that the tags appear in the RCS
83 file header. For CVS, this appears to be in reverse chronological
84 order of tag/branch creation.
85 """
86 pass
88 def set_locker(self, revision, locker):
89 """Reports a lock on this RCS file. This function will be called once
90 for each lock.
92 This is taken from the 'locks' header in the admin section of the RCS
93 file. This function can only be called before admin_completed().
95 Parameters: REVISION is a string containing a revision number. This is
96 an actual revision number, not a branch number.
97 LOCKER is a string containing a username.
98 """
99 pass
101 def set_locking(self, mode):
102 """Signals strict locking mode. This function will be called if and
103 only if the RCS file is in strict locking mode.
105 This is taken from the 'strict' header in the admin section of the RCS
106 file. This function can only be called before admin_completed().
108 Parameters: MODE is always the string 'strict'.
110 pass
112 def set_comment(self, comment):
113 """Reports the comment for this RCS file.
115 This is the value of the 'comment' header in the admin section of the
116 RCS file. This function can only be called before admin_completed().
118 Parameter: COMMENT is a string containing the comment. This may be
119 multi-line.
121 This field does not seem to be used by CVS.
123 pass
125 def set_expansion(self, mode):
126 """Reports the keyword expansion mode for this RCS file.
128 This is the value of the 'expand' header in the admin section of the
129 RCS file. This function can only be called before admin_completed().
131 Parameter: MODE is a string containing the keyword expansion mode.
132 Possible values include 'o' and 'b', amongst others.
134 pass
136 def admin_completed(self):
137 """Reports that the initial RCS header has been parsed. This function is
138 called exactly once.
140 pass
142 def define_revision(self, revision, timestamp, author, state,
143 branches, next):
144 """Reports metadata about a single revision.
146 This function is called for each revision. It is called later than
147 admin_completed() and earlier than tree_completed().
149 Parameter: REVISION is a revision number, as a string. This is an
150 actual revision number, not a branch number.
151 TIMESTAMP is the date and time that the revision was created, as an
152 integer number of seconds since the epoch. (I.e. "UNIX time" format).
153 AUTHOR is the author name, as a string.
154 STATE is the state of the revision, as a string. Common values are
155 "Exp" and "dead".
156 BRANCHES is a list of strings, with each string being an actual
157 revision number (not a branch number). For each branch which is based
158 on this revision and has commits, the revision number of the first
159 branch commit is listed here.
160 NEXT is either None or a string representing an actual revision number
161 (not a branch number).
163 When on trunk, NEXT points to what humans might consider to be the
164 'previous' revision number. For example, 1.3's NEXT is 1.2.
165 However, on a branch, NEXT really does point to what humans would
166 consider to be the 'next' revision number. For example, 1.1.2.1's
167 NEXT would be 1.1.2.2.
168 In other words, NEXT always means "where to find the next deltatext
169 that you need this revision to retrieve".
171 pass
173 def tree_completed(self):
174 """Reports that the RCS revision tree has been parsed. This function is
175 called exactly once. This function will be called later than
176 admin_completed().
178 pass
180 def set_description(self, description):
181 """Reports the description from the RCS file. This is set using the
182 "-m" flag to "cvs add". However, many CVS users don't use that option,
183 so this is often empty.
185 This function is called once, after tree_completed().
187 Parameter: DESCRIPTION is a string containing the description. This may
188 be multi-line.
190 pass
192 def set_revision_info(self, revision, log, text):
193 """Reports the log message and contents of a CVS revision.
195 This function is called for each revision. It is called later than
196 set_description().
198 Parameters: REVISION is a string containing the actual revision number.
199 LOG is a string containing the log message. This may be multi-line.
200 TEXT is the contents of the file in this revision, either as full-text or
201 as a diff. This is usually multi-line, and often quite large and/or
202 binary.
204 pass
206 def parse_completed(self):
207 """Reports that parsing an RCS file is complete.
209 This function is called once. After it is called, no more calls will be
210 made via this interface.
212 pass
215 # --------------------------------------------------------------------------
217 # EXCEPTIONS USED BY RCSPARSE
220 class RCSParseError(Exception):
221 pass
224 class RCSIllegalCharacter(RCSParseError):
225 pass
228 class RCSExpected(RCSParseError):
229 def __init__(self, got, wanted):
230 RCSParseError.__init__(
231 self,
232 'Unexpected parsing error in RCS file.\n'
233 'Expected token: %s, but saw: %s'
234 % (wanted, got)
238 class RCSStopParser(Exception):
239 pass
242 # --------------------------------------------------------------------------
244 # STANDARD TOKEN STREAM-BASED PARSER
247 class _Parser:
248 stream_class = None # subclasses need to define this
250 def _read_until_semicolon(self):
251 """Read all tokens up to and including the next semicolon token.
253 Return the tokens (not including the semicolon) as a list."""
255 tokens = []
257 while 1:
258 token = self.ts.get()
259 if token == ';':
260 break
261 tokens.append(token)
263 return tokens
265 def _parse_admin_head(self, token):
266 rev = self.ts.get()
267 if rev == ';':
268 # The head revision is not specified. Just drop the semicolon
269 # on the floor.
270 pass
271 else:
272 self.sink.set_head_revision(rev)
273 self.ts.match(';')
275 def _parse_admin_branch(self, token):
276 branch = self.ts.get()
277 if branch != ';':
278 self.sink.set_principal_branch(branch)
279 self.ts.match(';')
281 def _parse_admin_access(self, token):
282 accessors = self._read_until_semicolon()
283 if accessors:
284 self.sink.set_access(accessors)
286 def _parse_admin_symbols(self, token):
287 while 1:
288 tag_name = self.ts.get()
289 if tag_name == ';':
290 break
291 self.ts.match(':')
292 tag_rev = self.ts.get()
293 self.sink.define_tag(tag_name, tag_rev)
295 def _parse_admin_locks(self, token):
296 while 1:
297 locker = self.ts.get()
298 if locker == ';':
299 break
300 self.ts.match(':')
301 rev = self.ts.get()
302 self.sink.set_locker(rev, locker)
304 def _parse_admin_strict(self, token):
305 self.sink.set_locking("strict")
306 self.ts.match(';')
308 def _parse_admin_comment(self, token):
309 self.sink.set_comment(self.ts.get())
310 self.ts.match(';')
312 def _parse_admin_expand(self, token):
313 expand_mode = self.ts.get()
314 self.sink.set_expansion(expand_mode)
315 self.ts.match(';')
317 admin_token_map = {
318 'head' : _parse_admin_head,
319 'branch' : _parse_admin_branch,
320 'access' : _parse_admin_access,
321 'symbols' : _parse_admin_symbols,
322 'locks' : _parse_admin_locks,
323 'strict' : _parse_admin_strict,
324 'comment' : _parse_admin_comment,
325 'expand' : _parse_admin_expand,
326 'desc' : None,
329 def parse_rcs_admin(self):
330 while 1:
331 # Read initial token at beginning of line
332 token = self.ts.get()
334 try:
335 f = self.admin_token_map[token]
336 except KeyError:
337 # We're done once we reach the description of the RCS tree
338 if token[0] in string.digits:
339 self.ts.unget(token)
340 return
341 else:
342 # Chew up "newphrase"
343 # warn("Unexpected RCS token: $token\n")
344 pass
345 else:
346 if f is None:
347 self.ts.unget(token)
348 return
349 else:
350 f(self, token)
352 def _parse_rcs_tree_entry(self, revision):
353 # Parse date
354 self.ts.match('date')
355 date = self.ts.get()
356 self.ts.match(';')
358 # Convert date into standard UNIX time format (seconds since epoch)
359 date_fields = string.split(date, '.')
360 # According to rcsfile(5): the year "contains just the last two
361 # digits of the year for years from 1900 through 1999, and all the
362 # digits of years thereafter".
363 if len(date_fields[0]) == 2:
364 date_fields[0] = '19' + date_fields[0]
365 date_fields = map(string.atoi, date_fields)
366 EPOCH = 1970
367 if date_fields[0] < EPOCH:
368 raise ValueError, 'invalid year'
369 timestamp = calendar.timegm(tuple(date_fields) + (0, 0, 0,))
371 # Parse author
372 ### NOTE: authors containing whitespace are violations of the
373 ### RCS specification. We are making an allowance here because
374 ### CVSNT is known to produce these sorts of authors.
375 self.ts.match('author')
376 author = ' '.join(self._read_until_semicolon())
378 # Parse state
379 self.ts.match('state')
380 state = ''
381 while 1:
382 token = self.ts.get()
383 if token == ';':
384 break
385 state = state + token + ' '
386 state = state[:-1] # toss the trailing space
388 # Parse branches
389 self.ts.match('branches')
390 branches = self._read_until_semicolon()
392 # Parse revision of next delta in chain
393 self.ts.match('next')
394 next = self.ts.get()
395 if next == ';':
396 next = None
397 else:
398 self.ts.match(';')
400 # there are some files with extra tags in them. for example:
401 # owner 640;
402 # group 15;
403 # permissions 644;
404 # hardlinks @configure.in@;
405 # commitid mLiHw3bulRjnTDGr;
406 # this is "newphrase" in RCSFILE(5). we just want to skip over these.
407 while 1:
408 token = self.ts.get()
409 if token == 'desc' or token[0] in string.digits:
410 self.ts.unget(token)
411 break
412 # consume everything up to the semicolon
413 self._read_until_semicolon()
415 self.sink.define_revision(revision, timestamp, author, state, branches,
416 next)
418 def parse_rcs_tree(self):
419 while 1:
420 revision = self.ts.get()
422 # End of RCS tree description ?
423 if revision == 'desc':
424 self.ts.unget(revision)
425 return
427 self._parse_rcs_tree_entry(revision)
429 def parse_rcs_description(self):
430 self.ts.match('desc')
431 self.sink.set_description(self.ts.get())
433 def parse_rcs_deltatext(self):
434 while 1:
435 revision = self.ts.get()
436 if revision is None:
437 # EOF
438 break
439 text, sym2, log, sym1 = self.ts.mget(4)
440 if sym1 != 'log':
441 print `text[:100], sym2[:100], log[:100], sym1[:100]`
442 raise RCSExpected(sym1, 'log')
443 if sym2 != 'text':
444 raise RCSExpected(sym2, 'text')
445 ### need to add code to chew up "newphrase"
446 self.sink.set_revision_info(revision, log, text)
448 def parse(self, file, sink):
449 """Parse an RCS file.
451 Parameters: FILE is the file object to parse. (I.e. an object of the
452 built-in Python type "file", usually created using Python's built-in
453 "open()" function).
454 SINK is an instance of (some subclass of) Sink. It's methods will be
455 called as the file is parsed; see the definition of Sink for the
456 details.
458 self.ts = self.stream_class(file)
459 self.sink = sink
461 self.parse_rcs_admin()
463 # let sink know when the admin section has been completed
464 self.sink.admin_completed()
466 self.parse_rcs_tree()
468 # many sinks want to know when the tree has been completed so they can
469 # do some work to prep for the arrival of the deltatext
470 self.sink.tree_completed()
472 self.parse_rcs_description()
473 self.parse_rcs_deltatext()
475 # easiest for us to tell the sink it is done, rather than worry about
476 # higher level software doing it.
477 self.sink.parse_completed()
479 self.ts = self.sink = None
481 # --------------------------------------------------------------------------