Fixed status icon support with GTK 3
[zeroinstall.git] / zeroinstall / zerostore / optimise.py
bloba58950945d715351576c1677381425dd0b9301bd
1 """Optimise the cache."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from __future__ import print_function
8 from zeroinstall import _
9 import os, sys
10 from logging import warn
12 def _already_linked(a, b):
13 ai = os.stat(a)
14 bi = os.stat(b)
15 return (ai.st_dev, ai.st_ino) == (bi.st_dev, bi.st_ino)
17 def _byte_identical(a, b):
18 with open(a, 'rb') as af:
19 with open(b, 'rb') as bf:
20 while True:
21 adata = af.read(100)
22 bdata = bf.read(100)
23 if adata != bdata:
24 return False
25 if not adata:
26 return True
28 def _link(a, b, tmpfile):
29 """Keep 'a', delete 'b' and hard-link to 'a'"""
30 if not _byte_identical(a, b):
31 warn(_("Files should be identical, but they're not!\n%(file_a)s\n%(file_b)s"), {'file_a': a, 'file_b': b})
33 b_dir = os.path.dirname(b)
34 old_mode = os.lstat(b_dir).st_mode
35 os.chmod(b_dir, old_mode | 0o200) # Need write access briefly
36 try:
37 os.link(a, tmpfile)
38 try:
39 os.rename(tmpfile, b)
40 except:
41 os.unlink(tmpfile)
42 raise
43 finally:
44 os.chmod(b_dir, old_mode)
46 def optimise(impl_dir):
47 """Scan an implementation cache directory for duplicate files, and
48 hard-link any duplicates together to save space.
49 @param impl_dir: a $cache/0install.net/implementations directory
50 @type impl_dir: str
51 @return: (unique bytes, duplicated bytes, already linked, manifest size)
52 @rtype: (int, int, int, int)"""
54 first_copy = {} # TypeDigest -> Path
55 dup_size = uniq_size = already_linked = man_size = 0
57 import random
58 from zeroinstall.zerostore import BadDigest, parse_algorithm_digest_pair
60 for x in range(10):
61 tmpfile = os.path.join(impl_dir, 'optimise-%d' % random.randint(0, 1000000))
62 if not os.path.exists(tmpfile):
63 break
64 else:
65 raise Exception(_("Can't generate unused tempfile name!"))
67 dirs = os.listdir(impl_dir)
68 total = len(dirs)
69 msg = ""
70 def clear():
71 print("\r" + (" " * len(msg)) + "\r", end='')
72 for i, impl in enumerate(dirs):
73 clear()
74 msg = _("[%(done)d / %(total)d] Reading manifests...") % {'done': i, 'total': total}
75 print(msg, end='')
76 sys.stdout.flush()
78 try:
79 alg, manifest_digest = parse_algorithm_digest_pair(impl)
80 except BadDigest:
81 warn(_("Skipping non-implementation '%s'"), impl)
82 continue
83 manifest_path = os.path.join(impl_dir, impl, '.manifest')
84 try:
85 ms = open(manifest_path, 'rt')
86 except OSError as ex:
87 warn(_("Failed to read manifest file '%(manifest_path)s': %(exception)s"), {'manifest': manifest_path, 'exception': str(ex)})
88 continue
90 if alg == 'sha1': continue
92 man_size += os.path.getsize(manifest_path)
94 dir = ""
95 for line in ms:
96 if line[0] == 'D':
97 itype, path = line.split(' ', 1)
98 assert path.startswith('/')
99 dir = path[1:-1] # Strip slash and newline
100 continue
102 if line[0] == "S":
103 itype, digest, size, rest = line.split(' ', 3)
104 uniq_size += int(size)
105 continue
107 assert line[0] in "FX"
109 itype, digest, mtime, size, path = line.split(' ', 4)
110 path = path[:-1] # Strip newline
111 size = int(size)
113 key = (itype, digest, mtime, size)
114 loc_path = (impl, dir, path)
116 first_loc = first_copy.get(key, None)
117 if first_loc:
118 first_full = os.path.join(impl_dir, *first_loc)
119 new_full = os.path.join(impl_dir, *loc_path)
120 if _already_linked(first_full, new_full):
121 already_linked += size
122 else:
123 _link(first_full, new_full, tmpfile)
124 dup_size += size
125 else:
126 first_copy[key] = loc_path
127 uniq_size += size
128 clear()
129 return (uniq_size, dup_size, already_linked, man_size)