Support the generation of slocate-format databases
[findutils.git] / locate / updatedb.sh
blob80033598a847bb46730efd337fa9c4190606a72d
1 #! /bin/sh
2 # updatedb -- build a locate pathname database
3 # Copyright (C) 1994, 1996, 1997, 2000, 2001, 2003, 2004, 2005, 2006
4 # Free Software Foundation, Inc.
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2, or (at your option)
9 # any later version.
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program; if not, write to the Free Software
18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
19 # USA.
21 # csh original by James Woods; sh conversion by David MacKenzie.
23 #exec 2> /tmp/updatedb-trace.txt
24 #set -x
26 usage="\
27 Usage: $0 [--findoptions='-option1 -option2...']
28 [--localpaths='dir1 dir2...'] [--netpaths='dir1 dir2...']
29 [--prunepaths='dir1 dir2...'] [--prunefs='fs1 fs2...']
30 [--output=dbfile] [--netuser=user] [--localuser=user]
31 [--old-format] [--dbformat] [--version] [--help]
33 Report bugs to <bug-findutils@gnu.org>."
34 changeto=/
35 old=no
36 for arg
38 # If we are unable to fork, the back-tick operator will
39 # fail (and the shell will emit an error message). When
40 # this happens, we exit with error value 71 (EX_OSERR).
41 # Alternative candidate - 75, EX_TEMPFAIL.
42 opt=`echo $arg|sed 's/^\([^=]*\).*/\1/'` || exit 71
43 val=`echo $arg|sed 's/^[^=]*=\(.*\)/\1/'` || exit 71
44 case "$opt" in
45 --findoptions) FINDOPTIONS="$val" ;;
46 --localpaths) SEARCHPATHS="$val" ;;
47 --netpaths) NETPATHS="$val" ;;
48 --prunepaths) PRUNEPATHS="$val" ;;
49 --prunefs) PRUNEFS="$val" ;;
50 --output) LOCATE_DB="$val" ;;
51 --netuser) NETUSER="$val" ;;
52 --localuser) LOCALUSER="$val" ;;
53 --old-format) old=yes ;;
54 --changecwd) changeto="$val" ;;
55 --dbformat) dbformat="$val" ;;
56 --version) echo "GNU updatedb version @VERSION@"; exit 0 ;;
57 --help) echo "$usage"; exit 0 ;;
58 *) echo "updatedb: invalid option $opt
59 $usage" >&2
60 exit 1 ;;
61 esac
62 done
67 case "${dbformat:+yes}_${old}" in
68 yes_yes)
69 echo "The --dbformat and --old cannot both be specified." >&2
70 exit 1
72 *)
74 esac
76 if test "$old" = yes || test "$dbformat" = "old" ; then
77 echo "Warning: future versions of findutils will shortly discontinue support for the old locate database format." >&2
78 old=yes
79 sort="@SORT@"
80 print_option="-print"
81 frcode_options=""
82 else
83 frcode_options=""
84 case "$dbformat" in
85 "")
86 # Default, use LOCATE02
88 LOCATE02)
90 slocate)
91 frcode_options="$frcode_options -S 1"
94 echo "Unsupported locate database format ${dbformat}: Supported formats are:" >&2
95 echo "LOCATE02, slocate, old" >&2
96 exit 1
97 esac
100 if @SORT_SUPPORTS_Z@
101 then
102 sort="@SORT@ -z"
103 print_option="-print0"
104 frcode_options="$frcode_options -0"
105 else
106 sort="@SORT@"
107 print_option="-print"
111 getuid() {
112 # format of "id" output is ...
113 # uid=1(daemon) gid=1(other)
114 # for `id's that don't understand -u
115 id | cut -d'(' -f 1 | cut -d'=' -f2
118 # figure out if su supports the -s option
119 select_shell() {
120 if su "$1" -s $SHELL -c false < /dev/null ; then
121 # No.
122 echo ""
123 else
124 if su "$1" -s $SHELL -c true < /dev/null ; then
125 # Yes.
126 echo "-s $SHELL"
127 else
128 # su is unconditionally failing. We won't be able to
129 # figure out what is wrong, so be conservative.
130 echo ""
136 # You can set these in the environment, or use command-line options,
137 # to override their defaults:
139 # Any global options for find?
140 : ${FINDOPTIONS=}
142 # What shell shoud we use? We should use a POSIX-ish sh.
143 : ${SHELL="/bin/sh"}
145 # Non-network directories to put in the database.
146 : ${SEARCHPATHS="/"}
148 # Network (NFS, AFS, RFS, etc.) directories to put in the database.
149 : ${NETPATHS=}
151 # Directories to not put in the database, which would otherwise be.
152 : ${PRUNEPATHS="/tmp /usr/tmp /var/tmp /afs /amd /sfs"}
154 # Trailing slashes result in regex items that are never matched, which
155 # is not what the user will expect. Therefore we now reject such
156 # constructs.
157 for p in $PRUNEPATHS; do
158 case "$p" in
159 /*/) echo "$0: $p: pruned paths should not contain trailing slashes" >&2
160 exit 1
161 esac
162 done
164 # The same, in the form of a regex that find can use.
165 test -z "$PRUNEREGEX" &&
166 PRUNEREGEX=`echo $PRUNEPATHS|sed -e 's,^,\\\(^,' -e 's, ,$\\\)\\\|\\\(^,g' -e 's,$,$\\\),'`
168 # The database file to build.
169 : ${LOCATE_DB=@LOCATE_DB@}
171 # Directory to hold intermediate files.
172 if test -d /var/tmp; then
173 : ${TMPDIR=/var/tmp}
174 elif test -d /usr/tmp; then
175 : ${TMPDIR=/usr/tmp}
176 else
177 : ${TMPDIR=/tmp}
179 export TMPDIR
181 # The user to search network directories as.
182 : ${NETUSER=daemon}
184 # The directory containing the subprograms.
185 if test -n "$LIBEXECDIR" ; then
186 : LIBEXECDIR already set, do nothing
187 else
188 : ${LIBEXECDIR=@libexecdir@}
191 # The directory containing find.
192 if test -n "$BINDIR" ; then
193 : BINDIR already set, do nothing
194 else
195 : ${BINDIR=@bindir@}
198 # The names of the utilities to run to build the database.
199 : ${find:=${BINDIR}/@find@}
200 : ${frcode:=${LIBEXECDIR}/@frcode@}
201 : ${bigram:=${LIBEXECDIR}/@bigram@}
202 : ${code:=${LIBEXECDIR}/@code@}
205 checkbinary () {
206 if test -x "$1" ; then
207 : ok
208 else
209 eval echo "updatedb needs to be able to execute $1, but cannot." >&2
210 exit 1
214 for binary in $find $frcode $bigram $code
216 checkbinary $binary
217 done
220 PATH=/bin:/usr/bin:${BINDIR}; export PATH
222 : ${PRUNEFS="nfs NFS proc afs proc smbfs autofs iso9660 ncpfs coda devpts ftpfs devfs mfs sysfs shfs"}
224 if test -n "$PRUNEFS"; then
225 prunefs_exp=`echo $PRUNEFS |sed -e 's/\([^ ][^ ]*\)/-o -fstype \1/g' \
226 -e 's/-o //' -e 's/$/ -o/'`
227 else
228 prunefs_exp=''
231 # Make and code the file list.
232 # Sort case insensitively for users' convenience.
234 rm -f $LOCATE_DB.n
235 trap 'rm -f $LOCATE_DB.n; exit' HUP TERM
237 if test $old = no; then
238 # LOCATE02 or slocate format
239 if {
240 cd "$changeto"
241 if test -n "$SEARCHPATHS"; then
242 if [ "$LOCALUSER" != "" ]; then
243 # : A1
244 su $LOCALUSER `select_shell $LOCALUSER` -c \
245 "$find $SEARCHPATHS $FINDOPTIONS \
246 \\( $prunefs_exp \
247 -type d -regex '$PRUNEREGEX' \\) -prune -o $print_option"
248 else
249 # : A2
250 $find $SEARCHPATHS $FINDOPTIONS \
251 \( $prunefs_exp \
252 -type d -regex "$PRUNEREGEX" \) -prune -o $print_option
256 if test -n "$NETPATHS"; then
257 myuid=`getuid`
258 if [ "$myuid" = 0 ]; then
259 # : A3
260 su $NETUSER `select_shell $NETUSER` -c \
261 "$find $NETPATHS $FINDOPTIONS \\( -type d -regex '$PRUNEREGEX' -prune \\) -o $print_option" ||
262 exit $?
263 else
264 # : A4
265 $find $NETPATHS $FINDOPTIONS \( -type d -regex "$PRUNEREGEX" -prune \) -o $print_option ||
266 exit $?
269 } | $sort -f | $frcode $frcode_options > $LOCATE_DB.n
270 then
271 : OK so far
272 true
273 else
274 rv=$?
275 echo "Failed to generate $LOCATE_DB.n" >&2
276 rm -f $LOCATE_DB.n
277 exit $rv
280 # To avoid breaking locate while this script is running, put the
281 # results in a temp file, then rename it atomically.
282 if test -s $LOCATE_DB.n; then
283 rm -f $LOCATE_DB
284 mv $LOCATE_DB.n $LOCATE_DB
285 chmod 644 $LOCATE_DB
286 else
287 echo "updatedb: new database would be empty" >&2
288 rm -f $LOCATE_DB.n
291 else # old
293 if ! bigrams=`mktemp -t updatedbXXXXXXXXX`; then
294 echo mktemp failed >&2
295 exit 1
298 if ! filelist=`mktemp -t updatedbXXXXXXXXX`; then
299 echo mktemp failed >&2
300 exit 1
303 rm -f $LOCATE_DB.n
304 trap 'rm -f $bigrams $filelist $LOCATE_DB.n; exit' HUP TERM
306 # Alphabetize subdirectories before file entries using tr. James Woods says:
307 # "to get everything in monotonic collating sequence, to avoid some
308 # breakage i'll have to think about."
310 cd "$changeto"
311 if test -n "$SEARCHPATHS"; then
312 if [ "$LOCALUSER" != "" ]; then
313 # : A5
314 su $LOCALUSER `select_shell $LOCALUSER` -c \
315 "$find $SEARCHPATHS $FINDOPTIONS \
316 \( $prunefs_exp \
317 -type d -regex '$PRUNEREGEX' \) -prune -o $print_option" || exit $?
318 else
319 # : A6
320 $find $SEARCHPATHS $FINDOPTIONS \
321 \( $prunefs_exp \
322 -type d -regex "$PRUNEREGEX" \) -prune -o $print_option || exit $?
326 if test -n "$NETPATHS"; then
327 myuid=`getuid`
328 if [ "$myuid" = 0 ]; then
329 # : A7
330 su $NETUSER `select_shell $NETUSER` -c \
331 "$find $NETPATHS $FINDOPTIONS \\( -type d -regex '$PRUNEREGEX' -prune \\) -o $print_option" ||
332 exit $?
333 else
334 # : A8
335 $find $NETPATHS $FINDOPTIONS \( -type d -regex "$PRUNEREGEX" -prune \) -o $print_option ||
336 exit $?
339 } | tr / '\001' | $sort -f | tr '\001' / > $filelist
341 # Compute the (at most 128) most common bigrams in the file list.
342 $bigram $bigram_opts < $filelist | sort | uniq -c | sort -nr |
343 awk '{ if (NR <= 128) print $2 }' | tr -d '\012' > $bigrams
345 # Code the file list.
346 $code $bigrams < $filelist > $LOCATE_DB.n
348 rm -f $bigrams $filelist
350 # To reduce the chances of breaking locate while this script is running,
351 # put the results in a temp file, then rename it atomically.
352 if test -s $LOCATE_DB.n; then
353 rm -f $LOCATE_DB
354 mv $LOCATE_DB.n $LOCATE_DB
355 chmod 644 $LOCATE_DB
356 else
357 echo "updatedb: new database would be empty" >&2
358 rm -f $LOCATE_DB.n
363 exit 0