create_projects_bom.pl: include .no_blob_plain in bom
[girocco/readme.git] / jobd / gc-util-functions.sh
blob31ae8d756f5090aa955fb57a987ab91737663e24
1 #!/bin/sh
3 # This is a shell library for common gc related functions
4 # used by various Girocco scripts.
6 # shlib.sh always sets this, it's an error to source
7 # this script without having already sourced shlib.sh
8 [ -n "$var_git_exec_path" ] || exit 2
10 # default packing options
11 packopts="--depth=50 --window=50 --window-memory=${var_window_memory:-1g}"
12 quiet="-q"; [ "${show_progress:-0}" = "0" ] || quiet=
14 pidactive() {
15 if _result="$(kill -0 "$1" 2>&1)"; then
16 # process exists and we have permission to signal it
17 return 0
19 case "$_result" in *"not permitted"*)
20 # we do not have permission to signal the process
21 return 0
22 esac
23 # process does not exist
24 return 1
27 createlock() {
28 # A .lock file should only exist for much less than a second.
29 # If we see a stale lock file (> 1h old), remove it and then,
30 # just in case, wait 30 seconds for any process whose .lock
31 # we might have just removed (it's racy) to finish doing what
32 # should take much less than a second to do.
33 _stalelock="$(find -L "$1.lock" -maxdepth 1 -mmin +60 -print 2>/dev/null)" || :
34 if [ -n "$_stalelock" ]; then
35 rm -f "$_stalelock"
36 sleep 30
38 for _try in p p n; do
39 if (set -C; >"$1.lock") 2>/dev/null; then
40 echo "$1.lock"
41 return 0
43 # delay and try again
44 [ "$_try" != "p" ] || sleep 1
45 done
46 # cannot create lock file
47 return 1
50 # Create a gc.pid lockfile
51 # $1 => name of variable to store result in
52 # On success:
53 # variable named by $1 will contain the name of the newly create lockfile (i.e. "gc.pid")
54 # On failure:
55 # variable named by $1 will contain the failure reason
56 v_lock_gc() {
57 # be compatibile with gc.pid file from newer Git releases
58 _lockf='gc.pid'
59 _hn="$(hostname)"
60 _active=
61 if [ "$(createlock "$_lockf")" ]; then
62 # If $_lockf is:
63 # 1) less than 12 hours old
64 # 2) contains two fields (pid hostname) NO trailing NL
65 # 3) the hostname is different OR the pid is still alive
66 # then we exit as another active process is holding the lock
67 if [ "$(find -L "$_lockf" -maxdepth 1 -mmin -720 -print 2>/dev/null)" ]; then
68 _apid=
69 _ahost=
70 read -r _apid _ahost _ajunk <"$_lockf" || :
71 if [ "$_apid" ] && [ "$_ahost" ]; then
72 if [ "$_ahost" != "$_hn" ] || pidactive "$_apid"; then
73 _active=1
77 else
78 eval "$1="'"unable to create $_lockf.lock file"'
79 return 1
81 if [ -n "$_active" ]; then
82 rm -f "$_lockf.lock"
83 eval "$1="'"gc already running on machine '\''$_ahost'\'' pid '\''$_apid'\''"'
84 return 1
86 printf "%s %s" "$$" "$_hn" >"$_lockf.lock"
87 chmod 0664 "$_lockf.lock"
88 mv -f "$_lockf.lock" "$_lockf"
89 eval "$1="'"$_lockf"'
90 return 0
93 # make sure combine-packs uses the correct Git executable
94 run_combine_packs() {
95 PATH="$var_git_exec_path:$cfg_basedir/bin:$PATH" @basedir@/jobd/combine-packs.sh "$@"
98 # combine the input pack(s) into a new pack (or possibly packs if packSizeLimit set)
99 # input pack names are read from standard input one per line delimited by the first
100 # ':', ' ' or '\n' character on the line (which allows gfi-packs to be read directly)
101 # all arguments, if any, are passed to pack-objects as additional options
102 # first removes any pre-existing "*.zap*" sentinels that may be leftover from any
103 # previously aborted "--replace" operations
104 # returns non-zero on failure
105 combine_packs_std() {
106 find -L objects/pack -maxdepth 1 -type f -name '*.zap*' -exec rm -f '{}' + || :
107 run_combine_packs --replace "$@" $packopts --all-progress-implied $quiet --non-empty
110 # duplicate the first file to the name given by the second file making sure that
111 # the second file appears atomically all-at-once after the copy has been completed
112 # and does not appear at all if the copy fails (in which case this function fails)
113 # if the second file already exists this function fails with status 1
114 # if the file names are the same this function returns immediately with success
115 dupe_file() {
116 [ "$1" != "$2" ] || return 0
117 ! [ -e "$2" ] || return 1
118 case "$2" in
119 *?/?*) _tmpdir="${2%/*}";;
120 *) _tmpdir=".";;
121 esac
122 _tmpfile="$(mktemp "${_tmpdir:-.}/packtmp-XXXXXX")" || return 1
123 cp -fp "$1" "$_tmpfile" || return 1
124 mv -f "$_tmpfile" "$2"
127 # rename_pack oldnamepath newnamepath
128 # note that .keep and .bndl files are left untouched and not moved at all!
129 rename_pack() {
130 [ $# -eq 2 ] && [ "$1" != "$2" ] || {
131 echo >&2 "[$proj] incorrect use of rename_pack function"
132 exit 1
134 # Git assumes that if the destination of the rename already exists
135 # that it is, in fact, a copy of the same bytes so silently succeeds
136 # without doing anything. We duplicate that logic here.
137 # Git checks for the .idx file first before even trying to use a pack
138 # so it should be the last moved and the first removed.
139 for ext in pack bitmap idx; do
140 [ -f "$1.$ext" ] || continue
141 ln "$1.$ext" "$2.$ext" >/dev/null 2>&1 ||
142 dupe_file "$1.$ext" "$2.$ext" >/dev/null 2>&1 ||
143 [ -f "$2.$ext" ] || {
144 echo >&2 "[$proj] unable to move $1.$ext to $2.$ext"
145 exit 1
147 done
148 for ext in idx pack bitmap; do
149 rm -f "$1.$ext"
150 done
151 return 0
154 # current directory must already be set to the $GIT_DIR
155 # see if there are "lotsa" loose objects
156 # "lotsa" is defined as the 17, 68, 71 and 86 object directories existing
157 # and there being at least 5 total objects between them which corresponds
158 # to an approximate average of 320 loose objects before this function starts
159 # returning true and triggering a "mini" gc to pack up loose objects
160 lotsa_loose_objects() {
161 [ -d objects/17 ] && [ -d objects/68 ] && [ -d objects/71 ] && [ -d objects/86 ] || return 1
162 _objs=$(( $(find -L objects/17 objects/68 objects/71 objects/86 -maxdepth 1 -name "$octet19*" -type f -print 2>/dev/null | LC_ALL=C wc -l) ))
163 [ ${_objs:-0} -ge 5 ]
166 # same as lotsa_loose_objects but first runs `git prune-packed` if it can get a gc lock
167 lotsa_loose_pruned_objects() {
168 lotsa_loose_objects || return $?
169 v_lock_gc _gclock || return 0
170 git prune-packed --quiet
171 rm -f "$_gclock"
172 lotsa_loose_objects
175 # a "single object" pack is either a pack containing just one object
176 # or a pack containing zero objects (which is a degenerate case that
177 # normally can never happen). And it must have a name matching
178 # the pack-<sha1>*.pack pattern where either the "infix" suffix is "_l"
179 # or the name does not contain any "_" characters at all (and, obviously,
180 # must have a matching .idx file). Any .keep, .bundle, or .bitmap
181 # associated packs are automatically excluded from the count.
182 # "lotsa" here is defined as 20 or more.
183 lotsa_single_object_packs() {
184 __lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
185 _lpo01="$__lpo --exclude-limit 2"
186 # "$0=substr($0,19,length-23)" strips the leading "objects/pack/pack-" and trailing ".pack"
187 _sopacks="$(
188 list_packs --quiet $_lpo01 objects/pack 2>/dev/null |
189 LC_ALL=C awk 'BEGIN {c=0} {$0=substr($0,19,length-23)} !/_/ || /^[0-9a-f]*_l$/ {c+=1} END {print c}'
190 )" || :
191 [ ${_sopacks:-0} -ge 20 ]
194 # returns true if either lotsa_loose_pruned_objects or lotsa_single_object_packs is true
195 lotsa_loose_objects_or_sopacks() {
196 lotsa_single_object_packs || lotsa_loose_pruned_objects
199 # pack any existing, non-packed loose objects into a new _l.pack file then run prune-packed
200 # note that prune-packed is NOT run beforehand -- the caller must do that if needed
201 # loose objects need not be part of complete commits/trees as --weak-naming is used
202 # if there end up being too many loose packs, attempt to combine the packs too
203 pack_incremental_loose_objects() {
204 _lpacks="$(run_combine_packs </dev/null --names --loose --weak-naming --incremental --non-empty --all-progress-implied ${quiet:---progress} $packopts)"
205 if [ -n "$_lpacks" ]; then
206 # We need to identify these packs later so we don't combine_packs them
207 for _objpack in $_lpacks; do
208 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
209 done
210 git prune-packed $quiet
212 _packs=
213 __lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
214 _lpo01="$__lpo --exclude-limit 2"
215 _lpol="$__lpo --exclude-no-sfx _l"
216 list_packs --quiet $_lpo01 objects/pack 2>/dev/null |
217 while read -r _apack && _apack="${_apack%.pack}" && [ -n "$_apack" ]; do
218 case "$_apack" in *_*);;*)
219 rename_pack "$_apack" "${_apack}_l" || :
220 esac
221 done || :
222 { _packs="$(list_packs --quiet --count $_lpol objects/pack || :)" || :; } 2>/dev/null
223 [ "${_packs:-0}" -lt 20 ] || {
224 combine_small_incremental_loose_packs
225 _packs=
226 { _packs="$(list_packs --quiet --count $_lpol objects/pack || :)" || :; } 2>/dev/null
227 [ "${_packs:-0}" -lt 20 ] || combine_large_incremental_loose_packs
231 # same as pack_incremental_loose_objects except
232 # returns true if locked and packed and unlocked or
233 # false if could not lock (with err in $lockerr)
234 pack_incremental_loose_objects_if_lockable() {
235 if v_lock_gc _gclock; then
236 pack_incremental_loose_objects || :
237 rm -f "$_gclock"
238 return 0
239 else
240 lockerr="$_gclock"
241 return 1
245 # combine small _l packs into larger pack(s) using --weak-naming
246 # we avoid any non _l, keep, bndl or bitmap packs
247 combine_small_incremental_loose_packs() {
248 _lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
249 _lpo="$_lpo --exclude-no-sfx _l"
250 _lpo="$_lpo --quiet --object-limit $var_redelta_threshold objects/pack"
251 while
252 _cnt="$(list_packs --count $_lpo)" || :
253 test "${_cnt:-0}" -ge 2
255 _newp="$(list_packs $_lpo | combine_packs_std --names --weak-naming --no-reuse-delta)"
256 # We need to identify these packs later so we don't combine_packs them
257 for _objpack in $_newp; do
258 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
259 done
260 v_cnt _newc $_newp
261 # be paranoid and exit the loop if we haven't reduced the number of packs
262 [ $_newc -lt $_cnt ] || break
263 done
264 return 0
267 # combine large[ish] _l packs into larger pack(s) using --weak-naming
268 # we avoid any non _l, keep, bndl or bitmap packs
269 combine_large_incremental_loose_packs() {
270 _lpo="--exclude-no-idx --exclude-keep --exclude-bitmap --exclude-bndl"
271 _lpo="$_lpo --exclude-no-sfx _l"
272 _lpo="$_lpo --quiet --exclude-limit -$(( ( $var_redelta_threshold / 2 ) + 1 )) objects/pack"
273 while
274 _cnt="$(list_packs --count $_lpo)" || :
275 test "${_cnt:-0}" -ge 2
277 _newp="$(list_packs $_lpo | combine_packs_std --names --weak-naming)"
278 # We need to identify these packs later so we don't combine_packs them
279 for _objpack in $_newp; do
280 rename_pack "objects/pack/pack-$_objpack" "objects/pack/pack-${_objpack}_l" || :
281 done
282 v_cnt _newc $_newp
283 # be paranoid and exit the loop if we haven't reduced the number of packs
284 [ $_newc -lt $_cnt ] || break
285 done
286 return 0