5 Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)
6 You can use this code in any manner, as long as you leave my name on it
7 and don't hold me responsible for any problems with it.
9 Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun 5 00:27:08 CDT 1988
11 Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)
13 Major reworking by Henry Spencer as part of the C News project.
15 These routines replace dbm as used by the usenet news software
16 (it's not a full dbm replacement by any means). It's fast and
17 simple. It contains no AT&T code.
19 In general, dbz's files are 1/20 the size of dbm's. Lookup performance
20 is somewhat better, while file creation is spectacularly faster, especially
21 if the incore facility is used.
26 #include <sys/types.h>
34 * #ifdef index. "LIA" = "leave it alone unless you know what you're doing".
36 * FUNNYSEEKS SEEK_SET is not 0, get it from <unistd.h>
37 * INDEX_SIZE backward compatibility with old dbz; avoid using this
38 * NMEMORY number of days of memory for use in sizing new table (LIA)
39 * INCORE backward compatibility with old dbz; use dbzincore() instead
40 * DBZDEBUG enable debugging
41 * DEFSIZE default table size (not as critical as in old dbz)
42 * OLDBNEWS default case mapping as in old B News; set NOBUFFER
43 * BNEWS default case mapping as in current B News; set NOBUFFER
44 * DEFCASE default case-map algorithm selector
45 * NOTAGS fseek offsets are strange, do not do tagging (see below)
46 * NPAGBUF size of .pag buffer, in longs (LIA)
47 * SHISTBUF size of ASCII-file buffer, in bytes (LIA)
48 * MAXRUN length of run which shifts to next table (see below) (LIA)
49 * OVERFLOW long-int arithmetic overflow must be avoided, will trap
50 * NOBUFFER do not buffer hash-table i/o, B News locking is defective
62 static int dbzversion
= 3; /* for validating .dir file format */
65 * The dbz database exploits the fact that when news stores a <key,value>
66 * tuple, the `value' part is a seek offset into a text file, pointing to
67 * a copy of the `key' part. This avoids the need to store a copy of
68 * the key in the dbz files. However, the text file *must* exist and be
69 * consistent with the dbz files, or things will fail.
71 * The basic format of the database is a simple hash table containing the
72 * values. A value is stored by indexing into the table using a hash value
73 * computed from the key; collisions are resolved by linear probing (just
74 * search forward for an empty slot, wrapping around to the beginning of
75 * the table if necessary). Linear probing is a performance disaster when
76 * the table starts to get full, so a complication is introduced. The
77 * database is actually one *or more* tables, stored sequentially in the
78 * .pag file, and the length of linear-probe sequences is limited. The
79 * search (for an existing item or an empty slot) always starts in the
80 * first table, and whenever MAXRUN probes have been done in table N,
81 * probing continues in table N+1. This behaves reasonably well even in
82 * cases of massive overflow. There are some other small complications
83 * added, see comments below.
85 * The table size is fixed for any particular database, but is determined
86 * dynamically when a database is rebuilt. The strategy is to try to pick
87 * the size so the first table will be no more than 2/3 full, that being
88 * slightly before the point where performance starts to degrade. (It is
89 * desirable to be a bit conservative because the overflow strategy tends
90 * to produce files with holes in them, which is a nuisance.)
94 * The following is for backward compatibility.
97 #define DEFSIZE INDEX_SIZE
101 * ANSI C says the offset argument to fseek is a long, not an off_t, for some
102 * reason. Let's use off_t anyway.
104 #define SOF (sizeof(off_t))
107 * We assume that unused areas of a binary file are zeros, and that the
108 * bit pattern of `(off_t)0' is all zeros. The alternative is rather
109 * painful file initialization. Note that okayvalue(), if OVERFLOW is
110 * defined, knows what value of an offset would cause overflow.
112 #define VACANT ((off_t)0)
113 #define BIAS(o) ((o)+1) /* make any valid off_t non-VACANT */
114 #define UNBIAS(o) ((o)-1) /* reverse BIAS() effect */
117 * In a Unix implementation, or indeed any in which an off_t is a byte
118 * count, there are a bunch of high bits free in an off_t. There is a
119 * use for them. Checking a possible hit by looking it up in the base
120 * file is relatively expensive, and the cost can be dramatically reduced
121 * by using some of those high bits to tag the value with a few more bits
122 * of the key's hash. This detects most false hits without the overhead of
123 * seek+read+strcmp. We use the top bit to indicate whether the value is
124 * tagged or not, and don't tag a value which is using the tag bits itself.
125 * We're in trouble if the off_t representation wants to use the top bit.
126 * The actual bitmasks and offset come from the configuration stuff,
127 * which permits fiddling with them as necessary, and also suppressing
128 * them completely (by defining the masks to 0). We build pre-shifted
129 * versions of the masks for efficiency.
131 static off_t tagbits
; /* pre-shifted tag mask */
132 static off_t taghere
; /* pre-shifted tag-enable bit */
133 static off_t tagboth
; /* tagbits|taghere */
134 #define HASTAG(o) ((o)&taghere)
135 #define TAG(o) ((o)&tagbits)
136 #define NOTAG(o) ((o)&~tagboth)
137 #define CANTAG(o) (((o)&tagboth) == 0)
138 #define MKTAG(v) (((v)<<conf.tagshift)&tagbits)
141 * A new, from-scratch database, not built as a rebuild of an old one,
142 * needs to know table size, casemap algorithm, and tagging. Normally
143 * the user supplies this info, but there have to be defaults.
146 #define DEFSIZE 120011 /* 300007 might be better */
149 #define DEFCASE '0' /* B2.10 -- no mapping */
150 #define NOBUFFER /* B News locking is defective */
153 #define DEFCASE '=' /* B2.11 -- all mapped */
154 #define NOBUFFER /* B News locking is defective */
156 #ifndef DEFCASE /* C News compatibility is the default */
157 #define DEFCASE 'C' /* C News -- RFC822 mapping */
160 #define TAGENB 0x80 /* tag enable is top bit, tag is next 7 */
164 #define TAGENB 0 /* no tags */
170 * We read configuration info from the .dir file into this structure,
171 * so we can avoid wired-in assumptions for an existing database.
173 * Among the info is a record of recent peak usages, so that a new table
174 * size can be chosen intelligently when rebuilding. 10 is a good
175 * number of usages to keep, since news displays marked fluctuations
176 * in volume on a 7-day cycle.
179 int olddbz
; /* .dir file empty but .pag not? */
180 off_t tsize
; /* table size */
182 # define NMEMORY 10 /* # days of use info to remember */
184 # define NUSEDS (1+NMEMORY)
185 off_t used
[NUSEDS
]; /* entries used today, yesterday, ... */
186 int valuesize
; /* size of table values, == SOF */
187 int bytemap
[SOF
]; /* byte-order map */
188 char casemap
; /* case-mapping algorithm (see cipoint()) */
189 char fieldsep
; /* field separator in base file, if any */
190 off_t tagenb
; /* unshifted tag-enable bit */
191 off_t tagmask
; /* unshifted tag mask */
192 int tagshift
; /* shift count for tagmask and tagenb */
194 static struct dbzconfig conf
;
195 static int getconf();
197 static int putconf();
198 static void mybytemap();
199 static off_t
bytemap();
202 * For a program that makes many, many references to the database, it
203 * is a large performance win to keep the table in core, if it will fit.
204 * Note that this does hurt robustness in the event of crashes, and
205 * dbmclose() *must* be called to flush the in-core database to disk.
206 * The code is prepared to deal with the possibility that there isn't
207 * enough memory. There *is* an assumption that a size_t is big enough
208 * to hold the size (in bytes) of one table, so dbminit() tries to figure
209 * out whether this is possible first.
211 * The preferred way to ask for an in-core table is to do dbzincore(1)
212 * before dbminit(). The default is not to do it, although -DINCORE
213 * overrides this for backward compatibility with old dbz.
215 * We keep only the first table in core. This greatly simplifies the
216 * code, and bounds memory demand. Furthermore, doing this is a large
217 * performance win even in the event of massive overflow.
220 static int incore
= 1;
222 static int incore
= 0;
226 * Stdio buffer for .pag reads. Buffering more than about 16 does not help
227 * significantly at the densities we try to maintain, and the much larger
228 * buffers that most stdios default to are much more expensive to fill.
229 * With small buffers, stdio is performance-competitive with raw read(),
230 * and it's much more portable.
237 static off_t pagbuf
[NPAGBUF
]; /* only needed if !NOBUFFER && _IOFBF */
242 * Stdio buffer for base-file reads. Message-IDs (all news ever needs to
243 * read) are essentially never longer than 64 bytes, and the typical stdio
244 * buffer is so much larger that it is much more expensive to fill.
250 static char basebuf
[SHISTBUF
]; /* only needed if _IOFBF exists */
254 * Data structure for recording info about searches.
257 off_t place
; /* current location in file */
258 int tabno
; /* which table we're in */
259 int run
; /* how long we'll stay in this table */
263 long hash
; /* the key's hash code (for optimization) */
264 off_t tag
; /* tag we are looking for */
265 int seen
; /* have we examined current location? */
266 int aborted
; /* has i/o error aborted search? */
269 #define FRESH ((struct searcher *)NULL)
270 static off_t
search();
271 #define NOTFOUND ((off_t)-1)
272 static int okayvalue();
276 * Arguably the searcher struct for a given routine ought to be local to
277 * it, but a fetch() is very often immediately followed by a store(), and
278 * in some circumstances it is a useful performance win to remember where
279 * the fetch() completed. So we use a global struct and remember whether
282 static struct searcher srch
;
283 static struct searcher
*prevp
; /* &srch or FRESH */
285 /* byte-ordering stuff */
286 static int mybmap
[SOF
]; /* my byte order (see mybytemap()) */
287 static int bytesame
; /* is database order same as mine? */
288 #define MAPIN(o) ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))
289 #define MAPOUT(o) ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))
292 * The double parentheses needed to make this work are ugly, but the
293 * alternative (under most compilers) is to pack around 2K of unused
294 * strings -- there's just no way to get rid of them.
296 static int debug
; /* controlled by dbzdebug() */
298 #define DEBUG(args) if (debug) { (void) printf args ; }
300 #define DEBUG(args) ;
305 static void crcinit();
306 static char *cipoint();
307 static char *mapcase();
308 static int isprime();
309 static FILE *latebase();
311 /* file-naming stuff */
312 static char dir
[] = ".dir";
313 static char pag
[] = ".pag";
314 static char *enstring();
316 /* central data structures */
317 static FILE *basef
; /* descriptor for base file */
318 static char *basefname
; /* name for not-yet-opened base file */
319 static FILE *dirf
; /* descriptor for .dir file */
320 static int dirronly
; /* dirf open read-only? */
321 static FILE *pagf
= NULL
; /* descriptor for .pag file */
322 static off_t pagpos
; /* posn in pagf; only search may set != -1 */
323 static int pagronly
; /* pagf open read-only? */
324 static off_t
*corepag
; /* incore version of .pag file, if any */
325 static FILE *bufpagf
; /* well-buffered pagf, for incore rewrite */
326 static off_t
*getcore();
327 static int putcore();
328 static int written
; /* has a store() been done? */
331 - dbzfresh - set up a new database, no historical info
333 int /* 0 success, -1 failure */
334 dbzfresh(name
, size
, fs
, cmap
, tagmask
)
335 char *name
; /* base name; .dir and .pag must exist */
336 long size
; /* table size (0 means default) */
337 int fs
; /* field-separator character in base file */
338 int cmap
; /* case-map algorithm (0 means default) */
339 off_t tagmask
; /* 0 default, 1 no tags */
347 DEBUG(("dbzfresh: database already open\n"));
350 if (size
!= 0 && size
< 2) {
351 DEBUG(("dbzfresh: preposterous size (%ld)\n", size
));
355 /* get default configuration */
356 if (getconf((FILE *)NULL
, (FILE *)NULL
, &c
) < 0)
357 return(-1); /* "can't happen" */
359 /* and mess with it as specified */
366 case 'B': /* 2.10 compat */
367 c
.casemap
= '0'; /* '\0' nicer, but '0' printable! */
370 case 'b': /* 2.11 compat */
380 DEBUG(("dbzfresh case map `%c' unknown\n", cmap
));
385 case 0: /* default */
387 case 1: /* no tags */
400 c
.tagenb
= (m
<< 1) & ~m
;
405 fn
= enstring(name
, dir
);
411 DEBUG(("dbzfresh: unable to write config\n"));
414 if (putconf(f
, &c
) < 0) {
418 if (fclose(f
) == EOF
) {
419 DEBUG(("dbzfresh: fclose failure\n"));
423 /* create/truncate .pag */
424 fn
= enstring(name
, pag
);
430 DEBUG(("dbzfresh: unable to create/truncate .pag file\n"));
435 /* and punt to dbminit for the hard work */
436 return(dbminit(name
));
440 - dbzsize - what's a good table size to hold this many entries?
444 long contents
; /* 0 means what's the default */
448 if (contents
<= 0) { /* foulup or default inquiry */
449 DEBUG(("dbzsize: preposterous input (%ld)\n", contents
));
452 n
= (contents
/2)*3; /* try to keep table at most 2/3 full */
453 if (!(n
&01)) /* make it odd */
455 DEBUG(("dbzsize: tentative size %ld\n", n
));
456 while (!isprime(n
)) /* and look for a prime */
458 DEBUG(("dbzsize: final size %ld\n", n
));
464 - isprime - is a number prime?
466 * This is not a terribly efficient approach.
468 static int /* predicate */
472 static int quick
[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };
477 /* hit the first few primes quickly to eliminate easy ones */
478 /* this incidentally prevents ridiculously small tables */
479 for (ip
= quick
; (div
= *ip
) != 0; ip
++)
481 DEBUG(("isprime: quick result on %ld\n", (long)x
));
485 /* approximate square root of x */
486 for (stop
= x
; x
/stop
< stop
; stop
>>= 1)
490 /* try odd numbers up to stop */
491 for (div
= *--ip
; div
< stop
; div
+= 2)
499 - dbzagain - set up a new database to be a rebuild of an old one
501 int /* 0 success, -1 failure */
502 dbzagain(name
, oldname
)
503 char *name
; /* base name; .dir and .pag must exist */
504 char *oldname
; /* base name; all must exist */
511 register int newtable
;
512 register off_t newsize
;
515 DEBUG(("dbzagain: database already open\n"));
519 /* pick up the old configuration */
520 fn
= enstring(oldname
, dir
);
526 DEBUG(("dbzagain: cannot open old .dir file\n"));
529 i
= getconf(f
, (FILE *)NULL
, &c
);
532 DEBUG(("dbzagain: getconf failed\n"));
539 for (i
= 0; i
< NUSEDS
; i
++) {
543 newtable
= 1; /* hasn't got full usage history yet */
546 DEBUG(("dbzagain: old table has no contents!\n"));
549 for (i
= NUSEDS
-1; i
> 0; i
--)
550 c
.used
[i
] = c
.used
[i
-1];
552 newsize
= dbzsize(top
);
553 if (!newtable
|| newsize
> c
.tsize
) /* don't shrink new table */
557 fn
= enstring(name
, dir
);
563 DEBUG(("dbzagain: unable to write new .dir\n"));
569 DEBUG(("dbzagain: putconf failed\n"));
573 /* create/truncate .pag */
574 fn
= enstring(name
, pag
);
580 DEBUG(("dbzagain: unable to create/truncate .pag file\n"));
585 /* and let dbminit do the work */
586 return(dbminit(name
));
590 - dbminit - open a database, creating it (using defaults) if necessary
592 * We try to leave errno set plausibly, to the extent that underlying
593 * functions permit this, since many people consult it if dbminit() fails.
595 int /* 0 success, -1 failure */
601 register char *dirfname
;
602 register char *pagfname
;
605 DEBUG(("dbminit: dbminit already called once\n"));
610 /* open the .dir file */
611 dirfname
= enstring(name
, dir
);
612 if (dirfname
== NULL
)
614 dirf
= fopen(dirfname
, "r+");
616 dirf
= fopen(dirfname
, "r");
622 DEBUG(("dbminit: can't open .dir file\n"));
626 /* open the .pag file */
627 pagfname
= enstring(name
, pag
);
628 if (pagfname
== NULL
) {
632 pagf
= fopen(pagfname
, "r+b");
634 pagf
= fopen(pagfname
, "rb");
636 DEBUG(("dbminit: .pag open failed\n"));
648 * B News does not do adequate locking on its database accesses.
649 * Why it doesn't get into trouble using dbm is a mystery. In any
650 * case, doing unbuffered i/o does not cure the problem, but does
651 * enormously reduce its incidence.
653 (void) setbuf(pagf
, (char *)NULL
);
656 (void) setvbuf(pagf
, (char *)pagbuf
, _IOFBF
, sizeof(pagbuf
));
660 /* don't free pagfname, need it below */
662 /* open the base file */
663 basef
= fopen(name
, "r");
665 DEBUG(("dbminit: basefile open failed\n"));
666 basefname
= enstring(name
, "");
667 if (basefname
== NULL
) {
678 (void) setvbuf(basef
, basebuf
, _IOFBF
, sizeof(basebuf
));
681 /* pick up configuration */
682 if (getconf(dirf
, pagf
, &conf
) < 0) {
683 DEBUG(("dbminit: getconf failure\n"));
684 (void) fclose(basef
);
689 errno
= EDOM
; /* kind of a kludge, but very portable */
692 tagbits
= conf
.tagmask
<< conf
.tagshift
;
693 taghere
= conf
.tagenb
<< conf
.tagshift
;
694 tagboth
= tagbits
| taghere
;
697 for (i
= 0; i
< SOF
; i
++)
698 if (mybmap
[i
] != conf
.bytemap
[i
])
701 /* get first table into core, if it looks desirable and feasible */
702 s
= (size_t)conf
.tsize
* SOF
;
703 if (incore
&& (off_t
)(s
/SOF
) == conf
.tsize
) {
704 bufpagf
= fopen(pagfname
, (pagronly
) ? "rb" : "r+b");
706 corepag
= getcore(bufpagf
);
717 DEBUG(("dbminit: succeeded\n"));
722 - enstring - concatenate two strings into a malloced area
724 static char * /* NULL if malloc fails */
731 p
= malloc((size_t)strlen(s1
) + (size_t)strlen(s2
) + 1);
733 (void) strcpy(p
, s1
);
734 (void) strcat(p
, s2
);
736 DEBUG(("enstring(%s, %s) out of memory\n", s1
, s2
));
742 - dbmclose - close a database
747 register int ret
= 0;
750 DEBUG(("dbmclose: not opened!\n"));
754 if (fclose(pagf
) == EOF
) {
755 DEBUG(("dbmclose: fclose(pagf) failed\n"));
758 pagf
= basef
; /* ensure valid pointer; dbzsync checks it */
761 if (bufpagf
!= NULL
&& fclose(bufpagf
) == EOF
) {
762 DEBUG(("dbmclose: fclose(bufpagf) failed\n"));
766 free((char *)corepag
);
768 if (fclose(basef
) == EOF
) {
769 DEBUG(("dbmclose: fclose(basef) failed\n"));
772 if (basefname
!= NULL
)
776 if (fclose(dirf
) == EOF
) {
777 DEBUG(("dbmclose: fclose(dirf) failed\n"));
781 DEBUG(("dbmclose: %s\n", (ret
== 0) ? "succeeded" : "failed"));
786 - dbzsync - push all in-core data out to disk
791 register int ret
= 0;
794 DEBUG(("dbzsync: not opened!\n"));
800 if (corepag
!= NULL
) {
801 if (putcore(corepag
, bufpagf
) < 0) {
802 DEBUG(("dbzsync: putcore failed\n"));
807 if (putconf(dirf
, &conf
) < 0)
810 DEBUG(("dbzsync: %s\n", (ret
== 0) ? "succeeded" : "failed"));
815 - dbzcancel - cancel writing of in-core data
816 * Mostly for use from child processes.
817 * Note that we don't need to futz around with stdio buffers, because we
818 * always fflush them immediately anyway and so they never have stale data.
824 DEBUG(("dbzcancel: not opened!\n"));
833 - dbzfetch - fetch() with case mapping built in
839 char buffer
[DBZMAXKEY
+ 1];
841 register size_t keysize
;
843 DEBUG(("dbzfetch: (%s)\n", key
.dptr
));
845 /* Key is supposed to be less than DBZMAXKEY */
847 if (keysize
>= DBZMAXKEY
) {
849 DEBUG(("keysize is %d - truncated to %d\n", key
.dsize
, DBZMAXKEY
));
852 mappedkey
.dptr
= mapcase(buffer
, key
.dptr
, keysize
);
853 buffer
[keysize
] = '\0'; /* just a debug aid */
854 mappedkey
.dsize
= keysize
;
856 return(fetch(mappedkey
));
860 - fetch - get an entry from the database
862 * Disgusting fine point, in the name of backward compatibility: if the
863 * last character of "key" is a NUL, that character is (effectively) not
864 * part of the comparison against the stored keys.
866 datum
/* dptr NULL, dsize 0 means failure */
870 char buffer
[DBZMAXKEY
+ 1];
871 static off_t key_ptr
; /* return value points here */
873 register size_t keysize
;
874 register size_t cmplen
;
877 DEBUG(("fetch: (%s)\n", key
.dptr
));
882 /* Key is supposed to be less than DBZMAXKEY */
884 if (keysize
>= DBZMAXKEY
) {
886 DEBUG(("keysize is %d - truncated to %d\n", key
.dsize
, DBZMAXKEY
));
890 DEBUG(("fetch: database not open!\n"));
892 } else if (basef
== NULL
) { /* basef didn't exist yet */
899 sepp
= &conf
.fieldsep
;
900 if (key
.dptr
[keysize
-1] == '\0') {
902 sepp
= &buffer
[keysize
-1];
904 start(&srch
, &key
, FRESH
);
905 while ((key_ptr
= search(&srch
)) != NOTFOUND
) {
906 DEBUG(("got 0x%lx\n", key_ptr
));
909 if (fseek(basef
, key_ptr
, SEEK_SET
) != 0) {
910 DEBUG(("fetch: seek failed\n"));
913 if (fread(buffer
, 1, keysize
, basef
) != keysize
) {
914 DEBUG(("fetch: read failed\n"));
919 buffer
[keysize
] = '\0'; /* terminated for DEBUG */
920 (void) mapcase(buffer
, buffer
, keysize
);
921 DEBUG(("fetch: buffer (%s) looking for (%s) size = %d\n",
922 buffer
, key
.dptr
, keysize
));
923 if (memcmp(key
.dptr
, buffer
, cmplen
) == 0 &&
924 (*sepp
== conf
.fieldsep
|| *sepp
== '\0')) {
926 output
.dptr
= (char *)&key_ptr
;
928 DEBUG(("fetch: successful\n"));
933 /* we didn't find it */
934 DEBUG(("fetch: failed\n"));
935 prevp
= &srch
; /* remember where we stopped */
940 - latebase - try to open a base file that wasn't there at the start
947 if (basefname
== NULL
) {
948 DEBUG(("latebase: name foulup\n"));
951 it
= fopen(basefname
, "r");
953 DEBUG(("latebase: still can't open base\n"));
955 DEBUG(("latebase: late open succeeded\n"));
959 (void) setvbuf(it
, basebuf
, _IOFBF
, sizeof(basebuf
));
966 - dbzstore - store() with case mapping built in
973 char buffer
[DBZMAXKEY
+ 1];
975 register size_t keysize
;
977 DEBUG(("dbzstore: (%s)\n", key
.dptr
));
979 /* Key is supposed to be less than DBZMAXKEY */
981 if (keysize
>= DBZMAXKEY
) {
982 DEBUG(("dbzstore: key size too big (%d)\n", key
.dsize
));
986 mappedkey
.dptr
= mapcase(buffer
, key
.dptr
, keysize
);
987 buffer
[keysize
] = '\0'; /* just a debug aid */
988 mappedkey
.dsize
= keysize
;
990 return(store(mappedkey
, data
));
994 - store - add an entry to the database
996 int /* 0 success, -1 failure */
1004 DEBUG(("store: database not open!\n"));
1006 } else if (basef
== NULL
) { /* basef didn't exist yet */
1012 DEBUG(("store: database open read-only\n"));
1015 if (data
.dsize
!= SOF
) {
1016 DEBUG(("store: value size wrong (%d)\n", data
.dsize
));
1019 if (key
.dsize
>= DBZMAXKEY
) {
1020 DEBUG(("store: key size too big (%d)\n", key
.dsize
));
1024 /* copy the value in to ensure alignment */
1025 (void) memcpy((char *)&value
, data
.dptr
, SOF
);
1026 DEBUG(("store: (%s, %ld)\n", key
.dptr
, (long)value
));
1027 if (!okayvalue(value
)) {
1028 DEBUG(("store: reserved bit or overflow in 0x%lx\n", value
));
1032 /* find the place, exploiting previous search if possible */
1033 start(&srch
, &key
, prevp
);
1034 while (search(&srch
) != NOTFOUND
)
1039 DEBUG(("store: used count %ld\n", conf
.used
[0]));
1041 return(set(&srch
, value
));
1045 - dbzincore - control attempts to keep .pag file in core
1047 int /* old setting */
1051 register int old
= incore
;
1058 - getconf - get configuration from .dir file
1060 static int /* 0 success, -1 failure */
1062 register FILE *df
; /* NULL means just give me the default */
1063 register FILE *pf
; /* NULL means don't care about .pag */
1064 register struct dbzconfig
*cp
;
1070 c
= (df
!= NULL
) ? getc(df
) : EOF
;
1071 if (c
== EOF
) { /* empty file, no configuration known */
1073 if (df
!= NULL
&& pf
!= NULL
&& getc(pf
) != EOF
)
1075 cp
->tsize
= DEFSIZE
;
1076 cp
->fieldsep
= '\t';
1077 for (i
= 0; i
< NUSEDS
; i
++)
1079 cp
->valuesize
= SOF
;
1080 mybytemap(cp
->bytemap
);
1081 cp
->casemap
= DEFCASE
;
1082 cp
->tagenb
= TAGENB
;
1083 cp
->tagmask
= TAGMASK
;
1084 cp
->tagshift
= TAGSHIFT
;
1085 DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n",
1086 cp
->tsize
, cp
->casemap
, cp
->tagenb
,
1087 cp
->tagmask
, cp
->tagshift
));
1090 (void) ungetc(c
, df
);
1092 /* first line, the vital stuff */
1093 if (getc(df
) != 'd' || getc(df
) != 'b' || getc(df
) != 'z')
1095 if (getno(df
, &err
) != dbzversion
)
1097 cp
->tsize
= getno(df
, &err
);
1098 cp
->fieldsep
= getno(df
, &err
);
1099 while ((c
= getc(df
)) == ' ')
1102 cp
->tagenb
= getno(df
, &err
);
1103 cp
->tagmask
= getno(df
, &err
);
1104 cp
->tagshift
= getno(df
, &err
);
1105 cp
->valuesize
= getno(df
, &err
);
1106 if (cp
->valuesize
!= SOF
) {
1107 DEBUG(("getconf: wrong off_t size (%d)\n", cp
->valuesize
));
1109 cp
->valuesize
= SOF
; /* to protect the loops below */
1111 for (i
= 0; i
< cp
->valuesize
; i
++)
1112 cp
->bytemap
[i
] = getno(df
, &err
);
1113 if (getc(df
) != '\n')
1115 DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp
->tsize
,
1116 cp
->fieldsep
, cp
->casemap
, cp
->tagenb
, cp
->tagmask
,
1118 DEBUG(("bytemap (%d)", cp
->valuesize
));
1119 for (i
= 0; i
< cp
->valuesize
; i
++) {
1120 DEBUG((" %d", cp
->bytemap
[i
]));
1124 /* second line, the usages */
1125 for (i
= 0; i
< NUSEDS
; i
++)
1126 cp
->used
[i
] = getno(df
, &err
);
1127 if (getc(df
) != '\n')
1129 DEBUG(("used %ld %ld %ld...\n", cp
->used
[0], cp
->used
[1], cp
->used
[2]));
1132 DEBUG(("getconf error\n"));
1139 - getno - get a long
1151 while ((c
= getc(f
)) == ' ')
1153 if (c
== EOF
|| c
== '\n') {
1154 DEBUG(("getno: missing number\n"));
1160 while ((c
= getc(f
)) != EOF
&& c
!= '\n' && c
!= ' ')
1161 if (p
< &getbuf
[MAXN
-1])
1164 DEBUG(("getno: EOF\n"));
1167 (void) ungetc(c
, f
);
1170 if (strspn(getbuf
, "-1234567890") != strlen(getbuf
)) {
1171 DEBUG(("getno: `%s' non-numeric\n", getbuf
));
1174 return(atol(getbuf
));
1178 - putconf - write configuration to .dir file
1180 static int /* 0 success, -1 failure */
1183 register struct dbzconfig
*cp
;
1186 register int ret
= 0;
1188 if (fseek(f
, 0, SEEK_SET
) != 0) {
1189 DEBUG(("fseek failure in putconf\n"));
1192 fprintf(f
, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion
,
1194 cp
->fieldsep
, cp
->casemap
, (long)cp
->tagenb
,
1195 (long)cp
->tagmask
, cp
->tagshift
,
1198 for (i
= 0; i
< cp
->valuesize
; i
++)
1199 fprintf(f
, " %d", cp
->bytemap
[i
]);
1201 for (i
= 0; i
< NUSEDS
; i
++)
1203 (long)cp
->used
[i
], (i
< NUSEDS
-1) ? ' ' : '\n');
1210 DEBUG(("putconf status %d\n", ret
));
1215 - getcore - try to set up an in-core copy of .pag file
1217 static off_t
* /* pointer to copy, or NULL */
1223 register size_t nread
;
1226 it
= malloc((size_t)conf
.tsize
* SOF
);
1228 DEBUG(("getcore: malloc failed\n"));
1232 nread
= fread(it
, SOF
, (size_t)conf
.tsize
, f
);
1234 DEBUG(("getcore: read failed\n"));
1239 p
= (off_t
*)it
+ nread
;
1240 i
= (size_t)conf
.tsize
- nread
;
1243 return((off_t
*)it
);
1247 - putcore - try to rewrite an in-core table
1249 static int /* 0 okay, -1 fail */
1254 if (fseek(f
, 0, SEEK_SET
) != 0) {
1255 DEBUG(("fseek failure in putcore\n"));
1258 (void) fwrite((char *)tab
, SOF
, (size_t)conf
.tsize
, f
);
1260 return((ferror(f
)) ? -1 : 0);
1264 - start - set up to start or restart a search
1268 register struct searcher
*sp
;
1270 register struct searcher
*osp
; /* may be FRESH, i.e. NULL */
1274 h
= hash(kp
->dptr
, kp
->dsize
);
1275 if (osp
!= FRESH
&& osp
->hash
== h
) {
1278 DEBUG(("search restarted\n"));
1281 sp
->tag
= MKTAG(h
/ conf
.tsize
);
1282 DEBUG(("tag 0x%lx\n", sp
->tag
));
1283 sp
->place
= h
% conf
.tsize
;
1285 sp
->run
= (conf
.olddbz
) ? conf
.tsize
: MAXRUN
;
1292 - search - conduct part of a search
1294 static off_t
/* NOTFOUND if we hit VACANT or error */
1296 register struct searcher
*sp
;
1298 register off_t dest
;
1299 register off_t value
;
1300 off_t val
; /* buffer for value (can't fread register) */
1301 register off_t place
;
1307 /* determine location to be examined */
1310 /* go to next location */
1311 if (--sp
->run
<= 0) {
1315 place
= (place
+1)%conf
.tsize
+ sp
->tabno
*conf
.tsize
;
1318 sp
->seen
= 1; /* now looking at current location */
1319 DEBUG(("search @ %ld\n", place
));
1321 /* get the tagged value */
1322 if (corepag
!= NULL
&& place
< conf
.tsize
) {
1323 DEBUG(("search: in core\n"));
1324 value
= MAPIN(corepag
[place
]);
1326 /* seek, if necessary */
1328 if (pagpos
!= dest
) {
1329 if (fseek(pagf
, dest
, SEEK_SET
) != 0) {
1330 DEBUG(("search: seek failed\n"));
1339 if (fread((char *)&val
, sizeof(val
), 1, pagf
) == 1)
1341 else if (ferror(pagf
)) {
1342 DEBUG(("search: read failed\n"));
1350 pagpos
+= sizeof(val
);
1353 /* vacant slot is always cause to return */
1354 if (value
== VACANT
) {
1355 DEBUG(("search: empty slot\n"));
1360 value
= UNBIAS(value
);
1361 DEBUG(("got 0x%lx\n", value
));
1362 if (!HASTAG(value
)) {
1363 DEBUG(("tagless\n"));
1365 } else if (TAG(value
) == sp
->tag
) {
1367 return(NOTAG(value
));
1369 DEBUG(("mismatch 0x%lx\n", TAG(value
)));
1376 - okayvalue - check that a value can be stored
1378 static int /* predicate */
1385 if (value
== LONG_MAX
) /* BIAS() and UNBIAS() will overflow */
1392 - set - store a value into a location previously found by search
1394 static int /* 0 success, -1 failure */
1396 register struct searcher
*sp
;
1399 register off_t place
= sp
->place
;
1400 register off_t v
= value
;
1405 if (CANTAG(v
) && !conf
.olddbz
) {
1406 v
|= sp
->tag
| taghere
;
1407 if (v
!= UNBIAS(VACANT
)) /* BIAS(v) won't look VACANT */
1409 if (v
!= LONG_MAX
) /* and it won't overflow */
1413 DEBUG(("tagged value is 0x%lx\n", value
));
1414 value
= BIAS(value
);
1415 value
= MAPOUT(value
);
1417 /* If we have the index file in memory, use it */
1418 if (corepag
!= NULL
&& place
< conf
.tsize
) {
1419 corepag
[place
] = value
;
1420 DEBUG(("set: incore\n"));
1425 pagpos
= -1; /* invalidate position memory */
1426 if (fseek(pagf
, place
* SOF
, SEEK_SET
) != 0) {
1427 DEBUG(("set: seek failed\n"));
1433 if (fwrite((char *)&value
, SOF
, 1, pagf
) != 1) {
1434 DEBUG(("set: write failed\n"));
1438 /* fflush improves robustness, and buffer re-use is rare anyway */
1439 if (fflush(pagf
) == EOF
) {
1440 DEBUG(("set: fflush failed\n"));
1445 DEBUG(("set: succeeded\n"));
1450 - mybytemap - determine this machine's byte map
1452 * A byte map is an array of ints, sizeof(off_t) of them. The 0th int
1453 * is the byte number of the high-order byte in my off_t, and so forth.
1457 int map
[]; /* -> int[SOF] */
1463 register int *mp
= &map
[SOF
];
1468 for (ntodo
= (int)SOF
; ntodo
> 0; ntodo
--) {
1469 for (i
= 0; i
< SOF
; i
++)
1473 /* trouble -- set it to *something* consistent */
1474 DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo
));
1475 for (i
= 0; i
< SOF
; i
++)
1479 DEBUG(("mybytemap: byte %d\n", i
));
1487 - bytemap - transform an off_t from byte ordering map1 to map2
1489 static off_t
/* transformed result */
1490 bytemap(ino
, map1
, map2
)
1504 for (i
= 0; i
< SOF
; i
++)
1505 out
.c
[map2
[i
]] = in
.c
[map1
[i
]];
1510 * This is a simplified version of the pathalias hashing function.
1511 * Thanks to Steve Belovin and Peter Honeyman
1513 * hash a string into a long int. 31 bit crc (from andrew appel).
1514 * the crc table is computed at run time by crcinit() -- we could
1515 * precompute, but it takes 1 clock tick on a 750.
1517 * This fast table calculation works only if POLY is a prime polynomial
1518 * in the field of integers modulo 2. Since the coefficients of a
1519 * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
1520 * implicit. IT MUST ALSO BE THE CASE that the coefficients of orders
1521 * 31 down to 25 are zero. Happily, we have candidates, from
1522 * E. J. Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
1523 * x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
1526 * We reverse the bits to get:
1527 * 111101010000000000000000000000001 but drop the last 1
1529 * 010010000000000000000000000000001 ditto, for 31-bit crc
1533 #define POLY 0x48000000L /* 31-bit polynomial (avoids sign problems) */
1535 static long CrcTable
[128];
1538 - crcinit - initialize tables for hash function
1546 for (i
= 0; i
< 128; ++i
) {
1548 for (j
= 7 - 1; j
>= 0; --j
)
1553 DEBUG(("crcinit: done\n"));
1557 - hash - Honeyman's nice hashing function
1561 register char *name
;
1564 register long sum
= 0L;
1567 sum
= (sum
>> 7) ^ CrcTable
[(sum
^ (*name
++)) & 0x7f];
1569 DEBUG(("hash: returns (%ld)\n", sum
));
1574 * case-mapping stuff
1576 * Borrowed from C News, by permission of the authors. Somewhat modified.
1578 * We exploit the fact that we are dealing only with headers here, and
1579 * headers are limited to the ASCII characters by RFC822. It is barely
1580 * possible that we might be dealing with a translation into another
1581 * character set, but in particular it's very unlikely for a header
1582 * character to be outside -128..255.
1584 * Life would be a whole lot simpler if tolower() could safely and portably
1585 * be applied to any char.
1588 #define OFFSET 128 /* avoid trouble with negative chars */
1590 /* must call casencmp before invoking TOLOW... */
1591 #define TOLOW(c) (cmap[(c)+OFFSET])
1593 /* ...but the use of it in CISTREQN is safe without the preliminary call (!) */
1594 /* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */
1595 #define CISTREQN(a, b, n) \
1596 (TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0)
1598 #define MAPSIZE (256+OFFSET)
1599 static char cmap
[MAPSIZE
]; /* relies on init to '\0' */
1600 static int mprimed
= 0; /* has cmap been set up? */
1603 - mapprime - set up case-mapping stuff
1612 static char lower
[] = "abcdefghijklmnopqrstuvwxyz";
1613 static char upper
[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1615 for (lp
= lower
, up
= upper
; *lp
!= '\0'; lp
++, up
++) {
1618 cmap
[*up
+OFFSET
] = c
;
1620 for (i
= 0; i
< MAPSIZE
; i
++)
1621 if (cmap
[i
] == '\0')
1622 cmap
[i
] = (char)(i
-OFFSET
);
1627 - casencmp - case-independent strncmp
1629 static int /* < == > 0 */
1630 casencmp(s1
, s2
, len
)
1645 while (--n
>= 0 && *p1
!= '\0' && TOLOW(*p1
) == TOLOW(*p2
)) {
1653 * The following case analysis is necessary so that characters
1654 * which look negative collate low against normal characters but
1655 * high against the end-of-string NUL.
1657 if (*p1
== '\0' && *p2
== '\0')
1659 else if (*p1
== '\0')
1661 else if (*p2
== '\0')
1664 return(TOLOW(*p1
) - TOLOW(*p2
));
1668 - mapcase - do case-mapped copy
1670 static char * /* returns src or dst */
1671 mapcase(dst
, src
, siz
)
1672 char *dst
; /* destination, used only if mapping needed */
1673 char *src
; /* source; src == dst is legal */
1678 register char *c
; /* case break */
1679 register char *e
; /* end of source */
1682 c
= cipoint(src
, siz
);
1701 - cipoint - where in this message-ID does it become case-insensitive?
1703 * The RFC822 code is not quite complete. Absolute, total, full RFC822
1704 * compliance requires a horrible parsing job, because of the arcane
1705 * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi,
1706 * for example. There are three or four things that might occur in the
1707 * domain part of a message-id that are case-sensitive. They don't seem
1708 * to ever occur in real news, thank Cthulhu. (What? You were expecting
1709 * a merciful and forgiving deity to be invoked in connection with RFC822?
1710 * Forget it; none of them would come near it.)
1712 static char * /* pointer into s, or NULL for "nowhere" */
1718 static char post
[] = "postmaster";
1719 static int plen
= sizeof(post
)-1;
1721 switch (conf
.casemap
) {
1722 case '0': /* unmapped, sensible */
1725 case 'C': /* C News, RFC 822 conformant (approx.) */
1726 p
= memchr(s
, '@', siz
);
1727 if (p
== NULL
) /* no local/domain split */
1728 return(NULL
); /* assume all local */
1729 else if (p
- (s
+1) == plen
&& CISTREQN(s
+1, post
, plen
)) {
1730 /* crazy -- "postmaster" is case-insensitive */
1735 case '=': /* 2.11, neither sensible nor conformant */
1736 return(s
); /* all case-insensitive */
1740 DEBUG(("cipoint: unknown case mapping `%c'\n", conf
.casemap
));
1741 return(NULL
); /* just leave it alone */
1745 - dbzdebug - control dbz debugging at run time
1752 register int old
= debug
;