Invert logic to simplify if...else.
[cvs2svn.git] / cvs2svn_lib / sort.py
blob8a8911c16297f0b0e081475f3735f41b3331ed79
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Functions to sort large files.
19 The functions in this module were originally downloaded from the
20 following URL:
22 http://code.activestate.com/recipes/466302/
24 It was apparently submitted by Nicolas Lehuen on Tue, 17 Jan 2006.
25 According to the terms of service of that website, the code is usable
26 under the MIT license.
28 """
31 import os
32 import heapq
33 import itertools
34 import tempfile
37 def merge(chunks, key=None):
38 if key is None:
39 key = lambda x : x
41 values = []
43 for index, chunk in enumerate(chunks):
44 try:
45 iterator = iter(chunk)
46 value = iterator.next()
47 except StopIteration:
48 try:
49 chunk.close()
50 os.remove(chunk.name)
51 chunks.remove(chunk)
52 except:
53 pass
54 else:
55 heapq.heappush(values, ((key(value), index, value, iterator, chunk)))
57 while values:
58 k, index, value, iterator, chunk = heapq.heappop(values)
59 yield value
60 try:
61 value = iterator.next()
62 except StopIteration:
63 try:
64 chunk.close()
65 os.remove(chunk.name)
66 chunks.remove(chunk)
67 except:
68 pass
69 else:
70 heapq.heappush(values, (key(value), index, value, iterator, chunk))
73 def sort_file(input, output, key=None, buffer_size=32000, tempdirs=[]):
74 if not tempdirs:
75 tempdirs = [tempfile.gettempdir()]
77 input_file = file(input, 'rb', 64*1024)
78 try:
79 input_iterator = iter(input_file)
81 chunks = []
82 try:
83 for tempdir in itertools.cycle(tempdirs):
84 current_chunk = list(itertools.islice(input_iterator, buffer_size))
85 if not current_chunk:
86 break
87 current_chunk.sort(key=key)
88 (fd, filename) = tempfile.mkstemp(
89 '', 'sort%06i' % (len(chunks),), tempdir, False
91 os.close(fd)
92 output_chunk = open(filename, 'w+b', 64*1024)
93 output_chunk.writelines(current_chunk)
94 output_chunk.flush()
95 output_chunk.seek(0)
96 chunks.append(output_chunk)
97 except:
98 for chunk in chunks:
99 try:
100 chunk.close()
101 os.remove(chunk.name)
102 except:
103 pass
104 if output_chunk not in chunks:
105 try:
106 output_chunk.close()
107 os.remove(output_chunk.name)
108 except:
109 pass
110 return
111 finally:
112 input_file.close()
114 output_file = file(output, 'wb', 64*1024)
115 try:
116 output_file.writelines(merge(chunks, key))
117 finally:
118 for chunk in chunks:
119 try:
120 chunk.close()
121 os.remove(chunk.name)
122 except:
123 pass
124 output_file.close()