Updated to newer Python syntax where possible
[zeroinstall/zeroinstall-afb.git] / zeroinstall / zerostore / optimise.py
blob5a9d0234cafee2153b5ef3b866131849d4816fac
1 """Optimise the cache."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from zeroinstall import _
7 import os
8 from logging import warn
10 def _already_linked(a, b):
11 ai = os.stat(a)
12 bi = os.stat(b)
13 return (ai.st_dev, ai.st_ino) == (bi.st_dev, bi.st_ino)
15 def _byte_identical(a, b):
16 af = file(a, 'rb')
17 bf = file(b, 'rb')
18 while True:
19 adata = af.read(100)
20 bdata = bf.read(100)
21 if adata != bdata:
22 return False
23 if not adata:
24 return True
26 def _link(a, b, tmpfile):
27 """Keep 'a', delete 'b' and hard-link to 'a'"""
28 if not _byte_identical(a, b):
29 warn(_("Files should be identical, but they're not!\n%(file_a)s\n%(file_b)s"), {'file_a': a, 'file_b': b})
31 b_dir = os.path.dirname(b)
32 old_mode = os.lstat(b_dir).st_mode
33 os.chmod(b_dir, old_mode | 0200) # Need write access briefly
34 try:
35 os.link(a, tmpfile)
36 try:
37 os.rename(tmpfile, b)
38 except:
39 os.unlink(tmpfile)
40 raise
41 finally:
42 os.chmod(b_dir, old_mode)
44 def optimise(impl_dir):
45 """Scan an implementation cache directory for duplicate files, and
46 hard-link any duplicates together to save space.
47 @param impl_dir: a $cache/0install.net/implementations directory
48 @type impl_dir: str
49 @return: (unique bytes, duplicated bytes, already linked, manifest size)
50 @rtype: (int, int, int, int)"""
52 first_copy = {} # TypeDigest -> Path
53 dup_size = uniq_size = already_linked = man_size = 0
55 import random
57 for x in range(10):
58 tmpfile = os.path.join(impl_dir, 'optimise-%d' % random.randint(0, 1000000))
59 if not os.path.exists(tmpfile):
60 break
61 else:
62 raise Exception(_("Can't generate unused tempfile name!"))
64 for impl in os.listdir(impl_dir):
65 if impl.startswith('.') or '=' not in impl:
66 warn(_("Skipping non-implementation '%s'"), impl)
67 continue
68 manifest_path = os.path.join(impl_dir, impl, '.manifest')
69 try:
70 ms = file(manifest_path, 'rb')
71 except OSError, ex:
72 warn(_("Failed to read manifest file '%(manifest_path)s': %(exception)s"), {'manifest': manifest_path, 'exception': str(ex)})
73 continue
75 alg = impl.split('=', 1)[0]
76 if alg == 'sha1': continue
78 man_size += os.path.getsize(manifest_path)
80 dir = ""
81 for line in ms:
82 if line[0] == 'D':
83 itype, path = line.split(' ', 1)
84 assert path.startswith('/')
85 dir = path[1:-1] # Strip slash and newline
86 continue
88 if line[0] == "S":
89 itype, digest, size, rest = line.split(' ', 3)
90 uniq_size += int(size)
91 continue
93 assert line[0] in "FX"
95 itype, digest, mtime, size, path = line.split(' ', 4)
96 path = path[:-1] # Strip newline
97 size = int(size)
99 key = (itype, digest, mtime, size)
100 loc_path = (impl, dir, path)
102 first_loc = first_copy.get(key, None)
103 if first_loc:
104 first_full = os.path.join(impl_dir, *first_loc)
105 new_full = os.path.join(impl_dir, *loc_path)
106 if _already_linked(first_full, new_full):
107 already_linked += size
108 else:
109 _link(first_full, new_full, tmpfile)
110 dup_size += size
111 else:
112 first_copy[key] = loc_path
113 uniq_size += size
114 return (uniq_size, dup_size, already_linked, man_size)