Use gnulib-tool --import to import the gnulib code, rather than the odd way we were...
[findutils.git] / locate / frcode.c
blob3e6e32a638def59b4b3cdbf109899ec0f02ea67b
1 /* frcode -- front-compress a sorted list
2 Copyright (C) 1994 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 9 Temple Place - Suite 330, Boston, MA 02111-1307,
17 USA.
20 /* Usage: frcode < sorted-list > compressed-list
22 Uses front compression (also known as incremental encoding);
23 see ";login:", March 1983, p. 8.
25 The input is a sorted list of NUL-terminated strings.
26 (FIXME newline-terminated, until we figure out how to sort
27 NUL-terminated strings.)
29 The output entries are in the same order as the input;
30 each entry consists of an offset-differential count byte
31 (the additional number of characters of prefix of the preceding entry to
32 use beyond the number that the preceding entry is using of its predecessor),
33 followed by a null-terminated ASCII remainder.
35 If the offset-differential count is larger than can be stored
36 in a byte (+/-127), the byte has the value LOCATEDB_ESCAPE
37 and the count follows in a 2-byte word, with the high byte first
38 (network byte order).
40 Example:
42 Input, with NULs changed to newlines:
43 /usr/src
44 /usr/src/cmd/aardvark.c
45 /usr/src/cmd/armadillo.c
46 /usr/tmp/zoo
48 Length of the longest prefix of the preceding entry to share:
49 0 /usr/src
50 8 /cmd/aardvark.c
51 14 rmadillo.c
52 5 tmp/zoo
54 Output, with NULs changed to newlines and count bytes made printable:
55 0 LOCATE02
56 0 /usr/src
57 8 /cmd/aardvark.c
58 6 rmadillo.c
59 -9 tmp/zoo
61 (6 = 14 - 8, and -9 = 5 - 14)
63 Written by James A. Woods <jwoods@adobe.com>.
64 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
66 #include <config.h>
67 #include <stdio.h>
68 #include <sys/types.h>
70 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
71 #include <string.h>
72 #else
73 #include <strings.h>
74 #endif
76 #ifdef STDC_HEADERS
77 #include <stdlib.h>
78 #endif
80 #include "locatedb.h"
81 #include <getline.h>
83 char *xmalloc PARAMS((size_t));
85 /* The name this program was run with. */
86 char *program_name;
88 /* Write out a 16-bit int, high byte first (network byte order). */
90 static void
91 put_short (int c, FILE *fp)
93 putc (c >> 8, fp);
94 putc (c, fp);
97 /* Return the length of the longest common prefix of strings S1 and S2. */
99 static int
100 prefix_length (char *s1, char *s2)
102 register char *start;
104 for (start = s1; *s1 == *s2 && *s1 != '\0'; s1++, s2++)
106 return s1 - start;
110 main (int argc, char **argv)
112 char *path; /* The current input entry. */
113 char *oldpath; /* The previous input entry. */
114 size_t pathsize, oldpathsize; /* Amounts allocated for them. */
115 int count, oldcount, diffcount; /* Their prefix lengths & the difference. */
116 int line_len; /* Length of input line. */
118 program_name = argv[0];
120 pathsize = oldpathsize = 1026; /* Increased as necessary by getline. */
121 path = xmalloc (pathsize);
122 oldpath = xmalloc (oldpathsize);
124 /* Set to anything not starting with a slash, to force the first
125 prefix count to 0. */
126 strcpy (oldpath, " ");
127 oldcount = 0;
129 fwrite (LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC), 1, stdout);
131 /* FIXME temporary: change the \n to \0 when we figure out how to sort
132 null-terminated strings. */
133 while ((line_len = getline (&path, &pathsize, stdin)) > 0)
135 path[line_len - 1] = '\0'; /* FIXME temporary: nuke the newline. */
137 count = prefix_length (oldpath, path);
138 diffcount = count - oldcount;
139 oldcount = count;
140 /* If the difference is small, it fits in one byte;
141 otherwise, two bytes plus a marker noting that fact. */
142 if (diffcount < -127 || diffcount > 127)
144 putc (LOCATEDB_ESCAPE, stdout);
145 put_short (diffcount, stdout);
147 else
148 putc (diffcount, stdout);
150 fputs (path + count, stdout);
151 putc ('\0', stdout);
154 /* Swap path and oldpath and their sizes. */
155 char *tmppath = oldpath;
156 size_t tmppathsize = oldpathsize;
157 oldpath = path;
158 oldpathsize = pathsize;
159 path = tmppath;
160 pathsize = tmppathsize;
164 free (path);
165 free (oldpath);
167 return 0;