Update rcsparse from upstream, and improve the mechanism for doing so.
[cvs2svn.git] / rcsparse / common.py
blobf27c04cb6b05cbf8756d4918117fa24c3164b97d
2 # Copyright (C) 2000-2002 The ViewCVS Group. All Rights Reserved.
4 # By using this file, you agree to the terms and conditions set forth in
5 # the LICENSE.html file which can be found at the top level of the ViewCVS
6 # distribution or at http://viewcvs.sourceforge.net/license-1.html.
8 # Contact information:
9 # Greg Stein, PO Box 760, Palo Alto, CA, 94302
10 # gstein@lyra.org, http://viewcvs.sourceforge.net/
12 # -----------------------------------------------------------------------
14 # This software is being maintained as part of the ViewCVS project.
15 # Information is available at:
16 # http://viewcvs.sourceforge.net/
18 # -----------------------------------------------------------------------
20 """common.py: common classes and functions for the RCS parsing tools."""
22 import time
23 import string
25 ### compat isn't in vclib right now. need to work up a solution
26 import compat
29 class Sink:
30 def set_head_revision(self, revision):
31 pass
32 def set_principal_branch(self, branch_name):
33 pass
34 def define_tag(self, name, revision):
35 pass
36 def set_access(self, accessors):
37 pass
38 def set_expansion(self, mode):
39 pass
40 def set_locking(self, mode):
41 """Used to signal locking mode.
43 Called with mode argument 'strict' if strict locking
44 Not called when no locking used."""
45 pass
46 def set_locker(self, revision, locker):
47 pass
48 def set_comment(self, comment):
49 pass
50 def set_description(self, description):
51 pass
52 def define_revision(self, revision, timestamp, author, state,
53 branches, next):
54 pass
55 def set_revision_info(self, revision, log, text):
56 pass
57 def admin_completed(self):
58 pass
59 def tree_completed(self):
60 pass
61 def parse_completed(self):
62 pass
65 # --------------------------------------------------------------------------
67 # EXCEPTIONS USED BY RCSPARSE
70 class RCSParseError(Exception):
71 pass
72 class RCSIllegalCharacter(RCSParseError):
73 pass
74 ### need more work on this one
75 class RCSExpected(RCSParseError):
76 def __init__(self, got, wanted):
77 RCSParseError.__init__(self, got, wanted)
79 class RCSStopParser(Exception):
80 pass
82 # --------------------------------------------------------------------------
84 # STANDARD TOKEN STREAM-BASED PARSER
87 class _Parser:
88 stream_class = None # subclasses need to define this
90 def parse_rcs_admin(self):
91 while 1:
92 # Read initial token at beginning of line
93 token = self.ts.get()
95 # We're done once we reach the description of the RCS tree
96 if token[0] in string.digits:
97 self.ts.unget(token)
98 return
100 if token == "head":
101 semi, rev = self.ts.mget(2)
102 self.sink.set_head_revision(rev)
103 if semi != ';':
104 raise RCSExpected(semi, ';')
105 elif token == "branch":
106 semi, branch = self.ts.mget(2)
107 self.sink.set_principal_branch(branch)
108 if semi != ';':
109 raise RCSExpected(semi, ';')
110 elif token == "symbols":
111 while 1:
112 tag = self.ts.get()
113 if tag == ';':
114 break
115 self.ts.match(':')
116 tag_name = tag
117 tag_rev = self.ts.get()
118 self.sink.define_tag(tag_name, tag_rev)
119 elif token == "comment":
120 semi, comment = self.ts.mget(2)
121 self.sink.set_comment(comment)
122 if semi != ';':
123 raise RCSExpected(semi, ';')
124 elif token == "expand":
125 semi, expand_mode = self.ts.mget(2)
126 self.sink.set_expansion(expand_mode)
127 if semi != ';':
128 raise RCSExpected(semi, ';')
129 elif token == "locks":
130 while 1:
131 tag = self.ts.get()
132 if tag == ';':
133 break
134 (locker, rev) = string.split(tag,':')
135 self.sink.set_locker(rev, locker)
137 tag = self.ts.get()
138 if tag == "strict":
139 self.sink.set_locking("strict")
140 self.ts.match(';')
141 else:
142 self.ts.unget(tag)
143 elif token == "access":
144 accessors = []
145 while 1:
146 tag = self.ts.get()
147 if tag == ';':
148 if accessors != []:
149 self.sink.set_access(accessors)
150 break
151 accessors = accessors + [ tag ]
153 # Chew up "newphrase"
154 else:
155 pass
156 # warn("Unexpected RCS token: $token\n")
158 raise RuntimeError, "Unexpected EOF"
160 def parse_rcs_tree(self):
161 while 1:
162 revision = self.ts.get()
164 # End of RCS tree description ?
165 if revision == 'desc':
166 self.ts.unget(revision)
167 return
169 # Parse date
170 semi, date, sym = self.ts.mget(3)
171 if sym != 'date':
172 raise RCSExpected(sym, 'date')
173 if semi != ';':
174 raise RCSExpected(semi, ';')
176 # Convert date into timestamp
177 date_fields = string.split(date, '.') + ['0', '0', '0']
178 date_fields = map(string.atoi, date_fields)
179 # need to make the date four digits for timegm
180 EPOCH = 1970
181 if date_fields[0] < EPOCH:
182 if date_fields[0] < 70:
183 date_fields[0] = date_fields[0] + 2000
184 else:
185 date_fields[0] = date_fields[0] + 1900
186 if date_fields[0] < EPOCH:
187 raise ValueError, 'invalid year'
189 timestamp = compat.timegm(tuple(date_fields))
191 # Parse author
192 ### NOTE: authors containing whitespace are violations of the
193 ### RCS specification. We are making an allowance here because
194 ### CVSNT is known to produce these sorts of authors.
195 self.ts.match('author')
196 author = ''
197 while 1:
198 token = self.ts.get()
199 if token == ';':
200 break
201 author = author + token + ' '
202 author = author[:-1] # toss the trailing space
204 # Parse state
205 self.ts.match('state')
206 state = ''
207 while 1:
208 token = self.ts.get()
209 if token == ';':
210 break
211 state = state + token + ' '
212 state = state[:-1] # toss the trailing space
214 # Parse branches
215 self.ts.match('branches')
216 branches = [ ]
217 while 1:
218 token = self.ts.get()
219 if token == ';':
220 break
221 branches.append(token)
223 # Parse revision of next delta in chain
224 next, sym = self.ts.mget(2)
225 if sym != 'next':
226 raise RCSExpected(sym, 'next')
227 if next == ';':
228 next = None
229 else:
230 self.ts.match(';')
232 # there are some files with extra tags in them. for example:
233 # owner 640;
234 # group 15;
235 # permissions 644;
236 # hardlinks @configure.in@;
237 # this is "newphrase" in RCSFILE(5). we just want to skip over these.
238 while 1:
239 token = self.ts.get()
240 if token == 'desc' or token[0] in string.digits:
241 self.ts.unget(token)
242 break
243 # consume everything up to the semicolon
244 while self.ts.get() != ';':
245 pass
247 self.sink.define_revision(revision, timestamp, author, state, branches,
248 next)
250 def parse_rcs_description(self):
251 self.ts.match('desc')
252 self.sink.set_description(self.ts.get())
254 def parse_rcs_deltatext(self):
255 while 1:
256 revision = self.ts.get()
257 if revision is None:
258 # EOF
259 break
260 text, sym2, log, sym1 = self.ts.mget(4)
261 if sym1 != 'log':
262 print `text[:100], sym2[:100], log[:100], sym1[:100]`
263 raise RCSExpected(sym1, 'log')
264 if sym2 != 'text':
265 raise RCSExpected(sym2, 'text')
266 ### need to add code to chew up "newphrase"
267 self.sink.set_revision_info(revision, log, text)
269 def parse(self, file, sink):
270 self.ts = self.stream_class(file)
271 self.sink = sink
273 self.parse_rcs_admin()
275 # let sink know when the admin section has been completed
276 self.sink.admin_completed()
278 self.parse_rcs_tree()
280 # many sinks want to know when the tree has been completed so they can
281 # do some work to prep for the arrival of the deltatext
282 self.sink.tree_completed()
284 self.parse_rcs_description()
285 self.parse_rcs_deltatext()
287 # easiest for us to tell the sink it is done, rather than worry about
288 # higher level software doing it.
289 self.sink.parse_completed()
291 self.ts = self.sink = None
293 # --------------------------------------------------------------------------