* locate/frcode.c, locate/bigram.c: include headers from ../lib
[findutils.git] / locate / frcode.c
blob00bdeedf1cfb9c4bed1f57404ef91fb6e64555d6
1 /* frcode -- front-compress a sorted list
2 Copyright (C) 1994 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18 /* Usage: frcode < sorted-list > compressed-list
20 Uses front compression (also known as incremental encoding);
21 see ";login:", March 1983, p. 8.
23 The input is a sorted list of NUL-terminated strings.
24 (FIXME newline-terminated, until we figure out how to sort
25 NUL-terminated strings.)
27 The output entries are in the same order as the input;
28 each entry consists of an offset-differential count byte
29 (the additional number of characters of prefix of the preceding entry to
30 use beyond the number that the preceding entry is using of its predecessor),
31 followed by a null-terminated ASCII remainder.
33 If the offset-differential count is larger than can be stored
34 in a byte (+/-127), the byte has the value LOCATEDB_ESCAPE
35 and the count follows in a 2-byte word, with the high byte first
36 (network byte order).
38 Example:
40 Input, with NULs changed to newlines:
41 /usr/src
42 /usr/src/cmd/aardvark.c
43 /usr/src/cmd/armadillo.c
44 /usr/tmp/zoo
46 Length of the longest prefix of the preceding entry to share:
47 0 /usr/src
48 8 /cmd/aardvark.c
49 14 rmadillo.c
50 5 tmp/zoo
52 Output, with NULs changed to newlines and count bytes made printable:
53 0 LOCATE02
54 0 /usr/src
55 8 /cmd/aardvark.c
56 6 rmadillo.c
57 -9 tmp/zoo
59 (6 = 14 - 8, and -9 = 5 - 14)
61 Written by James A. Woods <jwoods@adobe.com>.
62 Modified by David MacKenzie <djm@gnu.ai.mit.edu>. */
64 #include <config.h>
65 #include <stdio.h>
66 #include <sys/types.h>
68 #if defined(HAVE_STRING_H) || defined(STDC_HEADERS)
69 #include <string.h>
70 #else
71 #include <strings.h>
72 #endif
74 #ifdef STDC_HEADERS
75 #include <stdlib.h>
76 #endif
78 #include "locatedb.h"
79 #include <getline.h>
81 char *xmalloc ();
83 /* The name this program was run with. */
84 char *program_name;
86 /* Write out a 16-bit int, high byte first (network byte order). */
88 static void
89 put_short (c, fp)
90 int c;
91 FILE *fp;
93 putc (c >> 8, fp);
94 putc (c, fp);
97 /* Return the length of the longest common prefix of strings S1 and S2. */
99 static int
100 prefix_length (s1, s2)
101 char *s1, *s2;
103 register char *start;
105 for (start = s1; *s1 == *s2 && *s1 != '\0'; s1++, s2++)
107 return s1 - start;
111 main (argc, argv)
112 int argc;
113 char **argv;
115 char *path; /* The current input entry. */
116 char *oldpath; /* The previous input entry. */
117 size_t pathsize, oldpathsize; /* Amounts allocated for them. */
118 int count, oldcount, diffcount; /* Their prefix lengths & the difference. */
119 int line_len; /* Length of input line. */
121 program_name = argv[0];
123 pathsize = oldpathsize = 1026; /* Increased as necessary by getline. */
124 path = xmalloc (pathsize);
125 oldpath = xmalloc (oldpathsize);
127 /* Set to anything not starting with a slash, to force the first
128 prefix count to 0. */
129 strcpy (oldpath, " ");
130 oldcount = 0;
132 fwrite (LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC), 1, stdout);
134 /* FIXME temporary: change the \n to \0 when we figure out how to sort
135 null-terminated strings. */
136 while ((line_len = getline (&path, &pathsize, stdin)) > 0)
138 path[line_len - 1] = '\0'; /* FIXME temporary: nuke the newline. */
140 count = prefix_length (oldpath, path);
141 diffcount = count - oldcount;
142 oldcount = count;
143 /* If the difference is small, it fits in one byte;
144 otherwise, two bytes plus a marker noting that fact. */
145 if (diffcount < -127 || diffcount > 127)
147 putc (LOCATEDB_ESCAPE, stdout);
148 put_short (diffcount, stdout);
150 else
151 putc (diffcount, stdout);
153 fputs (path + count, stdout);
154 putc ('\0', stdout);
157 /* Swap path and oldpath and their sizes. */
158 char *tmppath = oldpath;
159 size_t tmppathsize = oldpathsize;
160 oldpath = path;
161 oldpathsize = pathsize;
162 path = tmppath;
163 pathsize = tmppathsize;
167 free (path);
168 free (oldpath);
170 exit (0);