1 """Optimise the cache."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from __future__
import print_function
8 from zeroinstall
import _
, logger
11 def _already_linked(a
, b
):
14 return (ai
.st_dev
, ai
.st_ino
) == (bi
.st_dev
, bi
.st_ino
)
16 def _byte_identical(a
, b
):
17 with
open(a
, 'rb') as af
:
18 with
open(b
, 'rb') as bf
:
27 def _link(a
, b
, tmpfile
):
28 """Keep 'a', delete 'b' and hard-link to 'a'"""
29 if not _byte_identical(a
, b
):
30 logger
.warn(_("Files should be identical, but they're not!\n%(file_a)s\n%(file_b)s"), {'file_a': a
, 'file_b': b
})
32 b_dir
= os
.path
.dirname(b
)
33 old_mode
= os
.lstat(b_dir
).st_mode
34 os
.chmod(b_dir
, old_mode |
0o200) # Need write access briefly
43 os
.chmod(b_dir
, old_mode
)
45 def optimise(impl_dir
):
46 """Scan an implementation cache directory for duplicate files, and
47 hard-link any duplicates together to save space.
48 @param impl_dir: a $cache/0install.net/implementations directory
50 @return: (unique bytes, duplicated bytes, already linked, manifest size)
51 @rtype: (int, int, int, int)"""
53 first_copy
= {} # TypeDigest -> Path
54 dup_size
= uniq_size
= already_linked
= man_size
= 0
57 from zeroinstall
.zerostore
import BadDigest
, parse_algorithm_digest_pair
60 tmpfile
= os
.path
.join(impl_dir
, 'optimise-%d' % random
.randint(0, 1000000))
61 if not os
.path
.exists(tmpfile
):
64 raise Exception(_("Can't generate unused tempfile name!"))
66 dirs
= os
.listdir(impl_dir
)
70 print("\r" + (" " * len(msg
)) + "\r", end
='')
71 for i
, impl
in enumerate(dirs
):
73 msg
= _("[%(done)d / %(total)d] Reading manifests...") % {'done': i
, 'total': total
}
78 alg
, manifest_digest
= parse_algorithm_digest_pair(impl
)
80 logger
.warn(_("Skipping non-implementation '%s'"), impl
)
82 manifest_path
= os
.path
.join(impl_dir
, impl
, '.manifest')
84 ms
= open(manifest_path
, 'rt')
86 logger
.warn(_("Failed to read manifest file '%(manifest_path)s': %(exception)s"), {'manifest': manifest_path
, 'exception': str(ex
)})
89 if alg
== 'sha1': continue
91 man_size
+= os
.path
.getsize(manifest_path
)
96 itype
, path
= line
.split(' ', 1)
97 assert path
.startswith('/')
98 dir = path
[1:-1] # Strip slash and newline
102 itype
, digest
, size
, rest
= line
.split(' ', 3)
103 uniq_size
+= int(size
)
106 assert line
[0] in "FX"
108 itype
, digest
, mtime
, size
, path
= line
.split(' ', 4)
109 path
= path
[:-1] # Strip newline
112 key
= (itype
, digest
, mtime
, size
)
113 loc_path
= (impl
, dir, path
)
115 first_loc
= first_copy
.get(key
, None)
117 first_full
= os
.path
.join(impl_dir
, *first_loc
)
118 new_full
= os
.path
.join(impl_dir
, *loc_path
)
119 if _already_linked(first_full
, new_full
):
120 already_linked
+= size
122 _link(first_full
, new_full
, tmpfile
)
125 first_copy
[key
] = loc_path
128 return (uniq_size
, dup_size
, already_linked
, man_size
)