1 """Optimise the cache."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from zeroinstall
import _
8 from logging
import warn
10 def _already_linked(a
, b
):
13 return (ai
.st_dev
, ai
.st_ino
) == (bi
.st_dev
, bi
.st_ino
)
15 def _byte_identical(a
, b
):
26 def _link(a
, b
, tmpfile
):
27 """Keep 'a', delete 'b' and hard-link to 'a'"""
28 if not _byte_identical(a
, b
):
29 warn(_("Files should be identical, but they're not!\n%(file_a)s\n%(file_b)s"), {'file_a': a
, 'file_b': b
})
31 b_dir
= os
.path
.dirname(b
)
32 old_mode
= os
.lstat(b_dir
).st_mode
33 os
.chmod(b_dir
, old_mode |
0o200) # Need write access briefly
42 os
.chmod(b_dir
, old_mode
)
44 def optimise(impl_dir
):
45 """Scan an implementation cache directory for duplicate files, and
46 hard-link any duplicates together to save space.
47 @param impl_dir: a $cache/0install.net/implementations directory
49 @return: (unique bytes, duplicated bytes, already linked, manifest size)
50 @rtype: (int, int, int, int)"""
52 first_copy
= {} # TypeDigest -> Path
53 dup_size
= uniq_size
= already_linked
= man_size
= 0
58 tmpfile
= os
.path
.join(impl_dir
, 'optimise-%d' % random
.randint(0, 1000000))
59 if not os
.path
.exists(tmpfile
):
62 raise Exception(_("Can't generate unused tempfile name!"))
64 for impl
in os
.listdir(impl_dir
):
65 if impl
.startswith('.') or '=' not in impl
:
66 warn(_("Skipping non-implementation '%s'"), impl
)
68 manifest_path
= os
.path
.join(impl_dir
, impl
, '.manifest')
70 ms
= file(manifest_path
, 'rb')
72 warn(_("Failed to read manifest file '%(manifest_path)s': %(exception)s"), {'manifest': manifest_path
, 'exception': str(ex
)})
75 alg
= impl
.split('=', 1)[0]
76 if alg
== 'sha1': continue
78 man_size
+= os
.path
.getsize(manifest_path
)
83 itype
, path
= line
.split(' ', 1)
84 assert path
.startswith('/')
85 dir = path
[1:-1] # Strip slash and newline
89 itype
, digest
, size
, rest
= line
.split(' ', 3)
90 uniq_size
+= int(size
)
93 assert line
[0] in "FX"
95 itype
, digest
, mtime
, size
, path
= line
.split(' ', 4)
96 path
= path
[:-1] # Strip newline
99 key
= (itype
, digest
, mtime
, size
)
100 loc_path
= (impl
, dir, path
)
102 first_loc
= first_copy
.get(key
, None)
104 first_full
= os
.path
.join(impl_dir
, *first_loc
)
105 new_full
= os
.path
.join(impl_dir
, *loc_path
)
106 if _already_linked(first_full
, new_full
):
107 already_linked
+= size
109 _link(first_full
, new_full
, tmpfile
)
112 first_copy
[key
] = loc_path
114 return (uniq_size
, dup_size
, already_linked
, man_size
)