1 #!/usr/bin/env python -u
2 # (Be in -*- python -*- mode.)
4 # ====================================================================
5 # Copyright (c) 2009-2010 CollabNet. All rights reserved.
7 # This software is licensed as described in the file COPYING, which
8 # you should have received as part of this distribution. The terms
9 # are also available at http://subversion.tigris.org/license-1.html.
10 # If newer versions of this license are posted there, you may use a
11 # newer version instead, at your option.
13 # This software consists of voluntary contributions made by many
14 # individuals. For exact contribution history, see the revision
15 # history and logs, available at http://cvs2svn.tigris.org/.
16 # ====================================================================
18 """Generate git blobs directly from RCS files.
20 Usage: generate_blobs.py BLOBFILE
22 To standard input should be written a series of pickles, each of which
23 contains the following tuple:
25 (RCSFILE, {CVS_REV : MARK, ...})
27 indicating which RCS file to read, which CVS revisions should be
28 written to the blob file, and which marks to give each of the blobs.
30 Since the tuples are read from stdin, either the calling program has
31 to write to this program's stdin in binary mode and ensure that this
32 program's standard input is opened in binary mode (e.g., using
33 Python's '-u' option) or both can be in text mode *provided* that
34 pickle protocol 0 is used.
36 The program does most of its work in RAM, keeping at most one revision
37 fulltext and one revision deltatext (plus perhaps one or two copies as
38 scratch space) in memory at a time. But there are times when the
39 fulltext of a revision is needed multiple times, for example when
40 multiple branches sprout from the revision. In these cases, the
41 fulltext is written to disk. If the fulltext is also needed for the
42 blobfile, then the copy in the blobfils is read again when it is
43 needed. If the fulltext is not needed in the blobfile, then it is
44 written to a temporary file created with Python's tempfile module."""
49 import cPickle
as pickle
51 sys
.path
.insert(0, os
.path
.dirname(os
.path
.dirname(sys
.argv
[0])))
53 from cvs2svn_rcsparse
import Sink
54 from cvs2svn_rcsparse
import parse
55 from cvs2svn_lib
.rcs_stream
import RCSStream
59 # A map from CVS revision number (e.g., 1.2.3.4) to mark:
62 [rev
, mark
] = l
.strip().split()
68 class RevRecord(object):
69 def __init__(self
, rev
, mark
=None):
73 # The rev whose fulltext is the base for this one's delta.
76 # Other revs that refer to this one as their base text:
79 # The (f, offset, length) where the fulltext of this revision can
84 return bool(self
.mark
is not None or self
.refs
)
87 return self
.fulltext
is not None
89 def write_blob(self
, f
, text
):
93 f
.write('mark :%s\n' % (self
.mark
,))
94 f
.write('data %d\n' % (length
,))
99 self
.fulltext
= (f
, offset
, length
)
101 # This record (with its mark) has now been written, so the mark is
102 # no longer needed. Setting it to None might allow is_needed() to
106 def write(self
, f
, text
):
111 self
.fulltext
= (f
, offset
, length
)
113 def read_fulltext(self
):
114 assert self
.fulltext
is not None
115 (f
, offset
, length
) = self
.fulltext
117 return f
.read(length
)
120 if self
.mark
is not None:
121 return '%s (%r): %r, %s' % (
122 self
.rev
, self
.mark
, self
.refs
, self
.fulltext
is not None,
125 return '%s: %r, %s' % (self
.rev
, self
.refs
, self
.fulltext
is not None)
128 class WriteBlobSink(Sink
):
129 def __init__(self
, blobfile
, marks
):
130 self
.blobfile
= blobfile
132 # A map {rev : RevRecord} for all of the revisions whose fulltext
133 # will still be needed:
136 # The revisions that need markes will definitely be needed, so
137 # create records for them now (the rest will be filled in while
138 # reading the RCS file):
139 for (rev
, mark
) in marks
.items():
140 self
.revrecs
[rev
] = RevRecord(rev
, mark
)
142 # The RevRecord of the last fulltext that has been reconstructed,
143 # if it still is_needed():
144 self
.last_revrec
= None
145 # An RCSStream holding the fulltext of last_revrec:
146 self
.last_rcsstream
= None
148 # A file to temporarily hold the fulltexts of revisions for which
149 # no blobs are needed:
150 self
.fulltext_file
= tempfile
.TemporaryFile()
152 def __getitem__(self
, rev
):
154 return self
.revrecs
[rev
]
156 revrec
= RevRecord(rev
)
157 self
.revrecs
[rev
] = revrec
160 def define_revision(self
, rev
, timestamp
, author
, state
, branches
, next
):
164 revrec
.refs
.add(next
)
166 revrec
.refs
.update(branches
)
168 for dependent_rev
in revrec
.refs
:
169 dependent_revrec
= self
[dependent_rev
]
170 assert dependent_revrec
.base
is None
171 dependent_revrec
.base
= rev
173 def tree_completed(self
):
174 """Remove unneeded RevRecords.
176 Remove the RevRecords for any revisions whose fulltext will not be
177 needed (neither as blob output nor as the base of another needed
180 revrecs_to_remove
= [
182 for revrec
in self
.revrecs
.itervalues()
183 if not revrec
.is_needed()
185 while revrecs_to_remove
:
186 revrec
= revrecs_to_remove
.pop()
187 del self
.revrecs
[revrec
.rev
]
188 base_revrec
= self
[revrec
.base
]
189 base_revrec
.refs
.remove(revrec
.rev
)
190 if not base_revrec
.is_needed():
191 revrecs_to_remove
.append(base_revrec
)
193 def set_revision_info(self
, rev
, log
, text
):
194 revrec
= self
.revrecs
.get(rev
)
199 base_rev
= revrec
.base
201 # This must be the last revision on trunk, for which the
202 # fulltext is stored directly in the RCS file:
203 assert self
.last_revrec
is None
204 if revrec
.mark
is not None:
205 revrec
.write_blob(self
.blobfile
, text
)
206 if revrec
.is_needed():
207 self
.last_revrec
= revrec
208 self
.last_rcsstream
= RCSStream(text
)
209 elif self
.last_revrec
is not None and base_rev
== self
.last_revrec
.rev
:
210 # Our base revision is stored in self.last_rcsstream.
211 self
.last_revrec
.refs
.remove(rev
)
212 if self
.last_revrec
.is_needed():
213 if not self
.last_revrec
.is_written():
214 self
.last_revrec
.write(
215 self
.fulltext_file
, self
.last_rcsstream
.get_text()
217 self
.last_rcsstream
.apply_diff(text
)
218 if revrec
.mark
is not None:
219 revrec
.write_blob(self
.blobfile
, self
.last_rcsstream
.get_text())
220 if revrec
.is_needed():
221 self
.last_revrec
= revrec
223 self
.last_revrec
= None
224 self
.last_rcsstream
= None
226 # Our base revision is not stored in self.last_rcsstream; it
227 # will have to be obtained from elsewhere.
229 # Store the old last_rcsstream if necessary:
230 if self
.last_revrec
is not None:
231 if not self
.last_revrec
.is_written():
232 self
.last_revrec
.write(
233 self
.fulltext_file
, self
.last_rcsstream
.get_text()
235 self
.last_revrec
= None
236 self
.last_rcsstream
= None
238 base_revrec
= self
[base_rev
]
239 rcsstream
= RCSStream(base_revrec
.read_fulltext())
240 base_revrec
.refs
.remove(rev
)
241 rcsstream
.apply_diff(text
)
242 if revrec
.mark
is not None:
243 revrec
.write_blob(self
.blobfile
, rcsstream
.get_text())
244 if revrec
.is_needed():
245 self
.last_revrec
= revrec
246 self
.last_rcsstream
= rcsstream
249 def parse_completed(self
):
250 self
.fulltext_file
.close()
254 [blobfilename
] = args
255 blobfile
= open(blobfilename
, 'w+b')
258 (rcsfile
, marks
) = pickle
.load(sys
.stdin
)
261 parse(open(rcsfile
, 'rb'), WriteBlobSink(blobfile
, marks
))
266 if __name__
== '__main__':