Delete the chunk files in one finally block at the end of the function.
[cvs2svn.git] / cvs2svn_lib / sort.py
blobd8013f4b5362dcb9046c53445872be4bf930a394
1 # (Be in -*- python -*- mode.)
3 # ====================================================================
4 # Copyright (c) 2000-2009 CollabNet. All rights reserved.
6 # This software is licensed as described in the file COPYING, which
7 # you should have received as part of this distribution. The terms
8 # are also available at http://subversion.tigris.org/license-1.html.
9 # If newer versions of this license are posted there, you may use a
10 # newer version instead, at your option.
12 # This software consists of voluntary contributions made by many
13 # individuals. For exact contribution history, see the revision
14 # history and logs, available at http://cvs2svn.tigris.org/.
15 # ====================================================================
17 """Functions to sort large files.
19 The functions in this module were originally downloaded from the
20 following URL:
22 http://code.activestate.com/recipes/466302/
24 It was apparently submitted by Nicolas Lehuen on Tue, 17 Jan 2006.
25 According to the terms of service of that website, the code is usable
26 under the MIT license.
28 """
31 import os
32 import heapq
33 import itertools
34 import tempfile
37 def merge(chunks, key=None):
38 if key is None:
39 key = lambda x : x
41 values = []
43 for index, chunk in enumerate(chunks):
44 try:
45 iterator = iter(chunk)
46 value = iterator.next()
47 except StopIteration:
48 try:
49 chunk.close()
50 os.remove(chunk.name)
51 chunks.remove(chunk)
52 except:
53 pass
54 else:
55 heapq.heappush(values, ((key(value), index, value, iterator, chunk)))
57 while values:
58 k, index, value, iterator, chunk = heapq.heappop(values)
59 yield value
60 try:
61 value = iterator.next()
62 except StopIteration:
63 try:
64 chunk.close()
65 os.remove(chunk.name)
66 chunks.remove(chunk)
67 except:
68 pass
69 else:
70 heapq.heappush(values, (key(value), index, value, iterator, chunk))
73 def sort_file(input, output, key=None, buffer_size=32000, tempdirs=[]):
74 if not tempdirs:
75 tempdirs = [tempfile.gettempdir()]
77 chunks = []
78 try:
79 input_file = file(input, 'rb', 64*1024)
80 try:
81 input_iterator = iter(input_file)
83 for tempdir in itertools.cycle(tempdirs):
84 current_chunk = list(itertools.islice(input_iterator, buffer_size))
85 if not current_chunk:
86 break
87 current_chunk.sort(key=key)
88 (fd, filename) = tempfile.mkstemp(
89 '', 'sort%06i' % (len(chunks),), tempdir, False
91 os.close(fd)
92 output_chunk = open(filename, 'w+b', 64*1024)
93 chunks.append(output_chunk)
94 output_chunk.writelines(current_chunk)
95 output_chunk.flush()
96 output_chunk.seek(0)
97 finally:
98 input_file.close()
100 output_file = file(output, 'wb', 64*1024)
101 try:
102 output_file.writelines(merge(chunks, key))
103 finally:
104 output_file.close()
105 finally:
106 for chunk in chunks:
107 try:
108 chunk.close()
109 os.remove(chunk.name)
110 except:
111 pass