Added Bas van Gompel.
[findutils.git] / locate / frcode.c
blob4f20b589ed09286966242c6c5741e40cd3e26c52
1 /* frcode -- front-compress a sorted list
2 Copyright (C) 1994 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
17 USA.
20 /* Usage: frcode < sorted-list > compressed-list
22 Uses front compression (also known as incremental encoding);
23 see ";login:", March 1983, p. 8.
25 The input is a sorted list of NUL-terminated strings.
26 (FIXME newline-terminated, until we figure out how to sort
27 NUL-terminated strings.)
29 The output entries are in the same order as the input;
30 each entry consists of an offset-differential count byte
31 (the additional number of characters of prefix of the preceding entry to
32 use beyond the number that the preceding entry is using of its predecessor),
33 followed by a null-terminated ASCII remainder.
35 If the offset-differential count is larger than can be stored
36 in a byte (+/-127), the byte has the value LOCATEDB_ESCAPE
37 and the count follows in a 2-byte word, with the high byte first
38 (network byte order).
40 Example:
42 Input, with NULs changed to newlines:
43 /usr/src
44 /usr/src/cmd/aardvark.c
45 /usr/src/cmd/armadillo.c
46 /usr/tmp/zoo
48 Length of the longest prefix of the preceding entry to share:
49 0 /usr/src
50 8 /cmd/aardvark.c
51 14 rmadillo.c
52 5 tmp/zoo
54 Output, with NULs changed to newlines and count bytes made printable:
55 0 LOCATE02
56 0 /usr/src
57 8 /cmd/aardvark.c
58 6 rmadillo.c
59 -9 tmp/zoo
61 (6 = 14 - 8, and -9 = 5 - 14)
63 Written by James A. Woods <jwoods@adobe.com>.
64 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
66 #include <config.h>
67 #include <stdio.h>
68 #include <sys/types.h>
70 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
71 #include <string.h>
72 #else
73 #include <strings.h>
74 #endif
76 #ifdef STDC_HEADERS
77 #include <stdlib.h>
78 #endif
80 #if ENABLE_NLS
81 # include <libintl.h>
82 # define _(Text) gettext (Text)
83 #else
84 # define _(Text) Text
85 #define textdomain(Domain)
86 #define bindtextdomain(Package, Directory)
87 #endif
88 #ifdef gettext_noop
89 # define N_(String) gettext_noop (String)
90 #else
91 /* We used to use (String) instead of just String, but apparentl;y ISO C
92 * doesn't allow this (at least, that's what HP said when someone reported
93 * this as a compiler bug). This is HP case number 1205608192. See
94 * also http://gcc.gnu.org/bugzilla/show_bug.cgi?id=11250 (which references
95 * ANSI 3.5.7p14-15). The Intel icc compiler also rejects constructs
96 * like: static const char buf[] = ("string");
98 # define N_(String) String
99 #endif
102 #include "locatedb.h"
103 #include <getline.h>
104 #include <getopt.h>
106 char *xmalloc PARAMS((size_t));
108 /* The name this program was run with. */
109 char *program_name;
111 /* Write out a 16-bit int, high byte first (network byte order). */
113 static void
114 put_short (int c, FILE *fp)
116 putc (c >> 8, fp);
117 putc (c, fp);
120 /* Return the length of the longest common prefix of strings S1 and S2. */
122 static int
123 prefix_length (char *s1, char *s2)
125 register char *start;
127 for (start = s1; *s1 == *s2 && *s1 != '\0'; s1++, s2++)
129 return s1 - start;
132 static struct option const longopts[] =
134 {"help", no_argument, NULL, 'h'},
135 {"version", no_argument, NULL, 'v'},
136 {"null", no_argument, NULL, '0'},
137 {NULL, no_argument, NULL, 0}
140 extern char *version_string;
142 /* The name this program was run with. */
143 char *program_name;
146 static void
147 usage (FILE *stream)
149 fprintf (stream,
150 _("Usage: %s [-0 | --null] [--version] [--help]\n"),
151 program_name);
152 fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
157 main (int argc, char **argv)
159 char *path; /* The current input entry. */
160 char *oldpath; /* The previous input entry. */
161 size_t pathsize, oldpathsize; /* Amounts allocated for them. */
162 int count, oldcount, diffcount; /* Their prefix lengths & the difference. */
163 int line_len; /* Length of input line. */
164 int delimiter = '\n';
165 int optc;
167 program_name = argv[0];
169 pathsize = oldpathsize = 1026; /* Increased as necessary by getline. */
170 path = xmalloc (pathsize);
171 oldpath = xmalloc (oldpathsize);
173 /* Set to anything not starting with a slash, to force the first
174 prefix count to 0. */
175 strcpy (oldpath, " ");
176 oldcount = 0;
179 while ((optc = getopt_long (argc, argv, "hv0", longopts, (int *) 0)) != -1)
180 switch (optc)
182 case '0':
183 delimiter = 0;
184 break;
186 case 'h':
187 usage (stdout);
188 return 0;
190 case 'v':
191 printf (_("GNU locate version %s\n"), version_string);
192 return 0;
194 default:
195 usage (stderr);
196 return 1;
199 /* We expect to have no arguments. */
200 if (optind != argc)
202 usage (stderr);
203 return 1;
208 fwrite (LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC), 1, stdout);
210 while ((line_len = getdelim (&path, &pathsize, delimiter, stdin)) > 0)
212 path[line_len - 1] = '\0'; /* FIXME temporary: nuke the newline. */
214 count = prefix_length (oldpath, path);
215 diffcount = count - oldcount;
216 oldcount = count;
217 /* If the difference is small, it fits in one byte;
218 otherwise, two bytes plus a marker noting that fact. */
219 if (diffcount < -127 || diffcount > 127)
221 putc (LOCATEDB_ESCAPE, stdout);
222 put_short (diffcount, stdout);
224 else
225 putc (diffcount, stdout);
227 fputs (path + count, stdout);
228 putc ('\0', stdout);
231 /* Swap path and oldpath and their sizes. */
232 char *tmppath = oldpath;
233 size_t tmppathsize = oldpathsize;
234 oldpath = path;
235 oldpathsize = pathsize;
236 path = tmppath;
237 pathsize = tmppathsize;
241 free (path);
242 free (oldpath);
244 return 0;