1 """Optimise the cache."""
3 # Copyright (C) 2009, Thomas Leonard
4 # See the README file for details, or visit http://0install.net.
6 from __future__
import print_function
8 from zeroinstall
import _
10 from logging
import warn
12 def _already_linked(a
, b
):
15 return (ai
.st_dev
, ai
.st_ino
) == (bi
.st_dev
, bi
.st_ino
)
17 def _byte_identical(a
, b
):
28 def _link(a
, b
, tmpfile
):
29 """Keep 'a', delete 'b' and hard-link to 'a'"""
30 if not _byte_identical(a
, b
):
31 warn(_("Files should be identical, but they're not!\n%(file_a)s\n%(file_b)s"), {'file_a': a
, 'file_b': b
})
33 b_dir
= os
.path
.dirname(b
)
34 old_mode
= os
.lstat(b_dir
).st_mode
35 os
.chmod(b_dir
, old_mode |
0o200) # Need write access briefly
44 os
.chmod(b_dir
, old_mode
)
46 def optimise(impl_dir
):
47 """Scan an implementation cache directory for duplicate files, and
48 hard-link any duplicates together to save space.
49 @param impl_dir: a $cache/0install.net/implementations directory
51 @return: (unique bytes, duplicated bytes, already linked, manifest size)
52 @rtype: (int, int, int, int)"""
54 first_copy
= {} # TypeDigest -> Path
55 dup_size
= uniq_size
= already_linked
= man_size
= 0
60 tmpfile
= os
.path
.join(impl_dir
, 'optimise-%d' % random
.randint(0, 1000000))
61 if not os
.path
.exists(tmpfile
):
64 raise Exception(_("Can't generate unused tempfile name!"))
66 dirs
= os
.listdir(impl_dir
)
70 print("\r" + (" " * len(msg
)) + "\r", end
='')
71 for i
, impl
in enumerate(dirs
):
73 msg
= _("[%(done)d / %(total)d] Reading manifests...") % {'done': i
, 'total': total
}
77 if impl
.startswith('.') or '=' not in impl
:
78 warn(_("Skipping non-implementation '%s'"), impl
)
80 manifest_path
= os
.path
.join(impl_dir
, impl
, '.manifest')
82 ms
= file(manifest_path
, 'rb')
84 warn(_("Failed to read manifest file '%(manifest_path)s': %(exception)s"), {'manifest': manifest_path
, 'exception': str(ex
)})
87 alg
= impl
.split('=', 1)[0]
88 if alg
== 'sha1': continue
90 man_size
+= os
.path
.getsize(manifest_path
)
95 itype
, path
= line
.split(' ', 1)
96 assert path
.startswith('/')
97 dir = path
[1:-1] # Strip slash and newline
101 itype
, digest
, size
, rest
= line
.split(' ', 3)
102 uniq_size
+= int(size
)
105 assert line
[0] in "FX"
107 itype
, digest
, mtime
, size
, path
= line
.split(' ', 4)
108 path
= path
[:-1] # Strip newline
111 key
= (itype
, digest
, mtime
, size
)
112 loc_path
= (impl
, dir, path
)
114 first_loc
= first_copy
.get(key
, None)
116 first_full
= os
.path
.join(impl_dir
, *first_loc
)
117 new_full
= os
.path
.join(impl_dir
, *loc_path
)
118 if _already_linked(first_full
, new_full
):
119 already_linked
+= size
121 _link(first_full
, new_full
, tmpfile
)
124 first_copy
[key
] = loc_path
127 return (uniq_size
, dup_size
, already_linked
, man_size
)