Re-update to rcsparse r2495 to get all the goodness of the new update script.
[cvs2svn.git] / cvs2svn_lib / cvs_item_database.py
blob6222d0ef5d242abd6e09530b4983c4e2cda35a80
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2008 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """This module contains a database that can store arbitrary CVSItems."""
20 import re
21 import cPickle
23 from cvs2svn_lib.cvs_item import CVSRevisionAdd
24 from cvs2svn_lib.cvs_item import CVSRevisionChange
25 from cvs2svn_lib.cvs_item import CVSRevisionDelete
26 from cvs2svn_lib.cvs_item import CVSRevisionNoop
27 from cvs2svn_lib.cvs_item import CVSBranch
28 from cvs2svn_lib.cvs_item import CVSBranchNoop
29 from cvs2svn_lib.cvs_item import CVSTag
30 from cvs2svn_lib.cvs_item import CVSTagNoop
31 from cvs2svn_lib.cvs_file_items import CVSFileItems
32 from cvs2svn_lib.serializer import Serializer
33 from cvs2svn_lib.serializer import PrimedPickleSerializer
34 from cvs2svn_lib.indexed_database import IndexedStore
37 cvs_item_primer = (
38 CVSRevisionAdd, CVSRevisionChange,
39 CVSRevisionDelete, CVSRevisionNoop,
40 CVSBranch, CVSBranchNoop,
41 CVSTag, CVSTagNoop,
45 class NewCVSItemStore:
46 """A file of sequential CVSItems, grouped by CVSFile.
48 The file consists of a sequence of pickles. The zeroth one is a
49 Serializer as described in the serializer module. Subsequent ones
50 are pickled lists of CVSItems, each list containing all of the
51 CVSItems for a single file.
53 We don't use a single pickler for all items because the memo would
54 grow too large."""
56 def __init__(self, filename):
57 """Initialize an instance, creating the file and writing the primer."""
59 self.f = open(filename, 'wb')
61 self.serializer = PrimedPickleSerializer(
62 cvs_item_primer + (CVSFileItems,)
64 cPickle.dump(self.serializer, self.f, -1)
66 def add(self, cvs_file_items):
67 """Write CVS_FILE_ITEMS into the database."""
69 self.serializer.dumpf(self.f, cvs_file_items)
71 def close(self):
72 self.f.close()
73 self.f = None
76 class OldCVSItemStore:
77 """Read a file created by NewCVSItemStore.
79 The file must be read sequentially, one CVSFileItems instance at a
80 time."""
82 def __init__(self, filename):
83 self.f = open(filename, 'rb')
85 # Read the memo from the first pickle:
86 self.serializer = cPickle.load(self.f)
88 def iter_cvs_file_items(self):
89 """Iterate through the CVSFileItems instances, one file at a time.
91 Each time yield a CVSFileItems instance for one CVSFile."""
93 try:
94 while True:
95 yield self.serializer.loadf(self.f)
96 except EOFError:
97 return
99 def close(self):
100 self.f.close()
101 self.f = None
104 class LinewiseSerializer(Serializer):
105 """A serializer that writes exactly one line for each object.
107 The actual serialization is done by a wrapped serializer; this class
108 only escapes any newlines in the serialized data then appends a
109 single newline."""
111 def __init__(self, wrapee):
112 self.wrapee = wrapee
114 @staticmethod
115 def _encode_newlines(s):
116 """Return s with newlines and backslashes encoded.
118 The string is returned with the following character transformations:
120 LF -> \n
121 CR -> \r
122 ^Z -> \z (needed for Windows)
123 \ -> \\
127 return s.replace('\\', '\\\\') \
128 .replace('\n', '\\n') \
129 .replace('\r', '\\r') \
130 .replace('\x1a', '\\z')
132 _escape_re = re.compile(r'(\\\\|\\n|\\r|\\z)')
133 _subst = {'\\n' : '\n', '\\r' : '\r', '\\z' : '\x1a', '\\\\' : '\\'}
135 @staticmethod
136 def _decode_newlines(s):
137 """Return s with newlines and backslashes decoded.
139 This function reverses the encoding of _encode_newlines().
143 def repl(m):
144 return LinewiseSerializer._subst[m.group(1)]
146 return LinewiseSerializer._escape_re.sub(repl, s)
148 def dumpf(self, f, object):
149 f.write(self.dumps(object))
151 def dumps(self, object):
152 return self._encode_newlines(self.wrapee.dumps(object)) + '\n'
154 def loadf(self, f):
155 return self.loads(f.readline())
157 def loads(self, s):
158 return self.wrapee.loads(self._decode_newlines(s[:-1]))
161 class NewSortableCVSRevisionDatabase(object):
162 """A serially-accessible, sortable file for holding CVSRevisions.
164 This class creates such files."""
166 def __init__(self, filename, serializer):
167 self.f = open(filename, 'w')
168 self.serializer = LinewiseSerializer(serializer)
170 def add(self, cvs_rev):
171 self.f.write(
172 '%x %08x %s' % (
173 cvs_rev.metadata_id, cvs_rev.timestamp,
174 self.serializer.dumps(cvs_rev),
178 def close(self):
179 self.f.close()
180 self.f = None
183 class OldSortableCVSRevisionDatabase(object):
184 """A serially-accessible, sortable file for holding CVSRevisions.
186 This class reads such files."""
188 def __init__(self, filename, serializer):
189 self.filename = filename
190 self.serializer = LinewiseSerializer(serializer)
192 def __iter__(self):
193 f = open(self.filename, 'r')
194 for l in f:
195 s = l.split(' ', 2)[-1]
196 yield self.serializer.loads(s)
197 f.close()
199 def close(self):
200 pass
203 class NewSortableCVSSymbolDatabase(object):
204 """A serially-accessible, sortable file for holding CVSSymbols.
206 This class creates such files."""
208 def __init__(self, filename, serializer):
209 self.f = open(filename, 'w')
210 self.serializer = LinewiseSerializer(serializer)
212 def add(self, cvs_symbol):
213 self.f.write(
214 '%x %s' % (cvs_symbol.symbol.id, self.serializer.dumps(cvs_symbol))
217 def close(self):
218 self.f.close()
219 self.f = None
222 class OldSortableCVSSymbolDatabase(object):
223 """A serially-accessible, sortable file for holding CVSSymbols.
225 This class reads such files."""
227 def __init__(self, filename, serializer):
228 self.filename = filename
229 self.serializer = LinewiseSerializer(serializer)
231 def __iter__(self):
232 f = open(self.filename, 'r')
233 for l in f:
234 s = l.split(' ', 1)[-1]
235 yield self.serializer.loads(s)
236 f.close()
238 def close(self):
239 pass
242 def IndexedCVSItemStore(filename, index_filename, mode):
243 return IndexedStore(
244 filename, index_filename, mode,
245 PrimedPickleSerializer(cvs_item_primer)