libio/dbz/dbz.c

   1 /*
   2
   3 dbz.c  V3.2
   4
   5 Copyright 1988 Jon Zeeff (zeeff@b-tech.ann-arbor.mi.us)
   6 You can use this code in any manner, as long as you leave my name on it
   7 and don't hold me responsible for any problems with it.
   8
   9 Hacked on by gdb@ninja.UUCP (David Butler); Sun Jun  5 00:27:08 CDT 1988
  10
  11 Various improvments + INCORE by moraes@ai.toronto.edu (Mark Moraes)
  12
  13 Major reworking by Henry Spencer as part of the C News project.
  14
  15 These routines replace dbm as used by the usenet news software
  16 (it's not a full dbm replacement by any means).  It's fast and
  17 simple.  It contains no AT&T code.
  18
  19 In general, dbz's files are 1/20 the size of dbm's.  Lookup performance
  20 is somewhat better, while file creation is spectacularly faster, especially
  21 if the incore facility is used.
  22
  23 */
  24
  25 #include <stdio.h>
  26 #include <sys/types.h>
  27 #include <string.h>
  28 #include <ctype.h>
  29 #include <errno.h>
  30 #include <stdlib.h>
  31 #include <dbz.h>
  32
  33 /*
  34  * #ifdef index.  "LIA" = "leave it alone unless you know what you're doing".
  35  *
  36  * FUNNYSEEKS   SEEK_SET is not 0, get it from <unistd.h>
  37  * INDEX_SIZE   backward compatibility with old dbz; avoid using this
  38  * NMEMORY      number of days of memory for use in sizing new table (LIA)
  39  * INCORE       backward compatibility with old dbz; use dbzincore() instead
  40  * DBZDEBUG     enable debugging
  41  * DEFSIZE      default table size (not as critical as in old dbz)
  42  * OLDBNEWS     default case mapping as in old B News; set NOBUFFER
  43  * BNEWS        default case mapping as in current B News; set NOBUFFER
  44  * DEFCASE      default case-map algorithm selector
  45  * NOTAGS       fseek offsets are strange, do not do tagging (see below)
  46  * NPAGBUF      size of .pag buffer, in longs (LIA)
  47  * SHISTBUF     size of ASCII-file buffer, in bytes (LIA)
  48  * MAXRUN       length of run which shifts to next table (see below) (LIA)
  49  * OVERFLOW     long-int arithmetic overflow must be avoided, will trap
  50  * NOBUFFER     do not buffer hash-table i/o, B News locking is defective
  51  */
  52
  53 #ifdef FUNNYSEEKS
  54 #include <unistd.h>
  55 #else
  56 #define SEEK_SET        0
  57 #endif
  58 #ifdef OVERFLOW
  59 #include <limits.h>
  60 #endif
  61
  62 static int dbzversion = 3;      /* for validating .dir file format */
  63
  64 /*
  65  * The dbz database exploits the fact that when news stores a <key,value>
  66  * tuple, the `value' part is a seek offset into a text file, pointing to
  67  * a copy of the `key' part.  This avoids the need to store a copy of
  68  * the key in the dbz files.  However, the text file *must* exist and be
  69  * consistent with the dbz files, or things will fail.
  70  *
  71  * The basic format of the database is a simple hash table containing the
  72  * values.  A value is stored by indexing into the table using a hash value
  73  * computed from the key; collisions are resolved by linear probing (just
  74  * search forward for an empty slot, wrapping around to the beginning of
  75  * the table if necessary).  Linear probing is a performance disaster when
  76  * the table starts to get full, so a complication is introduced.  The
  77  * database is actually one *or more* tables, stored sequentially in the
  78  * .pag file, and the length of linear-probe sequences is limited.  The
  79  * search (for an existing item or an empty slot) always starts in the
  80  * first table, and whenever MAXRUN probes have been done in table N,
  81  * probing continues in table N+1.  This behaves reasonably well even in
  82  * cases of massive overflow.  There are some other small complications
  83  * added, see comments below.
  84  *
  85  * The table size is fixed for any particular database, but is determined
  86  * dynamically when a database is rebuilt.  The strategy is to try to pick
  87  * the size so the first table will be no more than 2/3 full, that being
  88  * slightly before the point where performance starts to degrade.  (It is
  89  * desirable to be a bit conservative because the overflow strategy tends
  90  * to produce files with holes in them, which is a nuisance.)
  91  */
  92
  93 /*
  94  * The following is for backward compatibility.
  95  */
  96 #ifdef INDEX_SIZE
  97 #define DEFSIZE INDEX_SIZE
  98 #endif
  99
 100 /*
 101  * ANSI C says the offset argument to fseek is a long, not an off_t, for some
 102  * reason.  Let's use off_t anyway.
 103  */
 104 #define SOF     (sizeof(off_t))
 105
 106 /*
 107  * We assume that unused areas of a binary file are zeros, and that the
 108  * bit pattern of `(off_t)0' is all zeros.  The alternative is rather
 109  * painful file initialization.  Note that okayvalue(), if OVERFLOW is
 110  * defined, knows what value of an offset would cause overflow.
 111  */
 112 #define VACANT          ((off_t)0)
 113 #define BIAS(o)         ((o)+1)         /* make any valid off_t non-VACANT */
 114 #define UNBIAS(o)       ((o)-1)         /* reverse BIAS() effect */
 115
 116 /*
 117  * In a Unix implementation, or indeed any in which an off_t is a byte
 118  * count, there are a bunch of high bits free in an off_t.  There is a
 119  * use for them.  Checking a possible hit by looking it up in the base
 120  * file is relatively expensive, and the cost can be dramatically reduced
 121  * by using some of those high bits to tag the value with a few more bits
 122  * of the key's hash.  This detects most false hits without the overhead of
 123  * seek+read+strcmp.  We use the top bit to indicate whether the value is
 124  * tagged or not, and don't tag a value which is using the tag bits itself.
 125  * We're in trouble if the off_t representation wants to use the top bit.
 126  * The actual bitmasks and offset come from the configuration stuff,
 127  * which permits fiddling with them as necessary, and also suppressing
 128  * them completely (by defining the masks to 0).  We build pre-shifted
 129  * versions of the masks for efficiency.
 130  */
 131 static off_t tagbits;           /* pre-shifted tag mask */
 132 static off_t taghere;           /* pre-shifted tag-enable bit */
 133 static off_t tagboth;           /* tagbits|taghere */
 134 #define HASTAG(o)       ((o)&taghere)
 135 #define TAG(o)          ((o)&tagbits)
 136 #define NOTAG(o)        ((o)&~tagboth)
 137 #define CANTAG(o)       (((o)&tagboth) == 0)
 138 #define MKTAG(v)        (((v)<<conf.tagshift)&tagbits)
 139
 140 /*
 141  * A new, from-scratch database, not built as a rebuild of an old one,
 142  * needs to know table size, casemap algorithm, and tagging.  Normally
 143  * the user supplies this info, but there have to be defaults.
 144  */
 145 #ifndef DEFSIZE
 146 #define DEFSIZE 120011          /* 300007 might be better */
 147 #endif
 148 #ifdef OLDBNEWS
 149 #define DEFCASE '0'             /* B2.10 -- no mapping */
 150 #define NOBUFFER                /* B News locking is defective */
 151 #endif
 152 #ifdef BNEWS
 153 #define DEFCASE '='             /* B2.11 -- all mapped */
 154 #define NOBUFFER                /* B News locking is defective */
 155 #endif
 156 #ifndef DEFCASE                 /* C News compatibility is the default */
 157 #define DEFCASE 'C'             /* C News -- RFC822 mapping */
 158 #endif
 159 #ifndef NOTAGS
 160 #define TAGENB  0x80            /* tag enable is top bit, tag is next 7 */
 161 #define TAGMASK 0x7f
 162 #define TAGSHIFT        24
 163 #else
 164 #define TAGENB  0               /* no tags */
 165 #define TAGMASK 0
 166 #define TAGSHIFT        0
 167 #endif
 168
 169 /*
 170  * We read configuration info from the .dir file into this structure,
 171  * so we can avoid wired-in assumptions for an existing database.
 172  *
 173  * Among the info is a record of recent peak usages, so that a new table
 174  * size can be chosen intelligently when rebuilding.  10 is a good
 175  * number of usages to keep, since news displays marked fluctuations
 176  * in volume on a 7-day cycle.
 177  */
 178 struct dbzconfig {
 179         int olddbz;             /* .dir file empty but .pag not? */
 180         off_t tsize;            /* table size */
 181 #       ifndef NMEMORY
 182 #       define  NMEMORY 10      /* # days of use info to remember */
 183 #       endif
 184 #       define  NUSEDS  (1+NMEMORY)
 185         off_t used[NUSEDS];     /* entries used today, yesterday, ... */
 186         int valuesize;          /* size of table values, == SOF */
 187         int bytemap[SOF];       /* byte-order map */
 188         char casemap;           /* case-mapping algorithm (see cipoint()) */
 189         char fieldsep;          /* field separator in base file, if any */
 190         off_t tagenb;           /* unshifted tag-enable bit */
 191         off_t tagmask;          /* unshifted tag mask */
 192         int tagshift;           /* shift count for tagmask and tagenb */
 193 };
 194 static struct dbzconfig conf;
 195 static int getconf();
 196 static long getno();
 197 static int putconf();
 198 static void mybytemap();
 199 static off_t bytemap();
 200
 201 /*
 202  * For a program that makes many, many references to the database, it
 203  * is a large performance win to keep the table in core, if it will fit.
 204  * Note that this does hurt robustness in the event of crashes, and
 205  * dbmclose() *must* be called to flush the in-core database to disk.
 206  * The code is prepared to deal with the possibility that there isn't
 207  * enough memory.  There *is* an assumption that a size_t is big enough
 208  * to hold the size (in bytes) of one table, so dbminit() tries to figure
 209  * out whether this is possible first.
 210  *
 211  * The preferred way to ask for an in-core table is to do dbzincore(1)
 212  * before dbminit().  The default is not to do it, although -DINCORE
 213  * overrides this for backward compatibility with old dbz.
 214  *
 215  * We keep only the first table in core.  This greatly simplifies the
 216  * code, and bounds memory demand.  Furthermore, doing this is a large
 217  * performance win even in the event of massive overflow.
 218  */
 219 #ifdef INCORE
 220 static int incore = 1;
 221 #else
 222 static int incore = 0;
 223 #endif
 224
 225 /*
 226  * Stdio buffer for .pag reads.  Buffering more than about 16 does not help
 227  * significantly at the densities we try to maintain, and the much larger
 228  * buffers that most stdios default to are much more expensive to fill.
 229  * With small buffers, stdio is performance-competitive with raw read(),
 230  * and it's much more portable.
 231  */
 232 #ifndef NPAGBUF
 233 #define NPAGBUF 16
 234 #endif
 235 #ifndef NOBUFFER
 236 #ifdef _IOFBF
 237 static off_t pagbuf[NPAGBUF];   /* only needed if !NOBUFFER && _IOFBF */
 238 #endif
 239 #endif
 240
 241 /*
 242  * Stdio buffer for base-file reads.  Message-IDs (all news ever needs to
 243  * read) are essentially never longer than 64 bytes, and the typical stdio
 244  * buffer is so much larger that it is much more expensive to fill.
 245  */
 246 #ifndef SHISTBUF
 247 #define SHISTBUF        64
 248 #endif
 249 #ifdef _IOFBF
 250 static char basebuf[SHISTBUF];          /* only needed if _IOFBF exists */
 251 #endif
 252
 253 /*
 254  * Data structure for recording info about searches.
 255  */
 256 struct searcher {
 257         off_t place;            /* current location in file */
 258         int tabno;              /* which table we're in */
 259         int run;                /* how long we'll stay in this table */
 260 #               ifndef MAXRUN
 261 #               define  MAXRUN  100
 262 #               endif
 263         long hash;              /* the key's hash code (for optimization) */
 264         off_t tag;              /* tag we are looking for */
 265         int seen;               /* have we examined current location? */
 266         int aborted;            /* has i/o error aborted search? */
 267 };
 268 static void start();
 269 #define FRESH   ((struct searcher *)NULL)
 270 static off_t search();
 271 #define NOTFOUND        ((off_t)-1)
 272 static int okayvalue();
 273 static int set();
 274
 275 /*
 276  * Arguably the searcher struct for a given routine ought to be local to
 277  * it, but a fetch() is very often immediately followed by a store(), and
 278  * in some circumstances it is a useful performance win to remember where
 279  * the fetch() completed.  So we use a global struct and remember whether
 280  * it is current.
 281  */
 282 static struct searcher srch;
 283 static struct searcher *prevp;  /* &srch or FRESH */
 284
 285 /* byte-ordering stuff */
 286 static int mybmap[SOF];                 /* my byte order (see mybytemap()) */
 287 static int bytesame;                    /* is database order same as mine? */
 288 #define MAPIN(o)        ((bytesame) ? (o) : bytemap((o), conf.bytemap, mybmap))
 289 #define MAPOUT(o)       ((bytesame) ? (o) : bytemap((o), mybmap, conf.bytemap))
 290
 291 /*
 292  * The double parentheses needed to make this work are ugly, but the
 293  * alternative (under most compilers) is to pack around 2K of unused
 294  * strings -- there's just no way to get rid of them.
 295  */
 296 static int debug;                       /* controlled by dbzdebug() */
 297 #ifdef DBZDEBUG
 298 #define DEBUG(args) if (debug) { (void) printf args ; }
 299 #else
 300 #define DEBUG(args)     ;
 301 #endif
 302
 303 /* misc. forwards */
 304 static long hash();
 305 static void crcinit();
 306 static char *cipoint();
 307 static char *mapcase();
 308 static int isprime();
 309 static FILE *latebase();
 310
 311 /* file-naming stuff */
 312 static char dir[] = ".dir";
 313 static char pag[] = ".pag";
 314 static char *enstring();
 315
 316 /* central data structures */
 317 static FILE *basef;             /* descriptor for base file */
 318 static char *basefname;         /* name for not-yet-opened base file */
 319 static FILE *dirf;              /* descriptor for .dir file */
 320 static int dirronly;            /* dirf open read-only? */
 321 static FILE *pagf = NULL;       /* descriptor for .pag file */
 322 static off_t pagpos;            /* posn in pagf; only search may set != -1 */
 323 static int pagronly;            /* pagf open read-only? */
 324 static off_t *corepag;          /* incore version of .pag file, if any */
 325 static FILE *bufpagf;           /* well-buffered pagf, for incore rewrite */
 326 static off_t *getcore();
 327 static int putcore();
 328 static int written;             /* has a store() been done? */
 329
 330 /*
 331  - dbzfresh - set up a new database, no historical info
 332  */
 333 int                             /* 0 success, -1 failure */
 334 dbzfresh(name, size, fs, cmap, tagmask)
 335 char *name;                     /* base name; .dir and .pag must exist */
 336 long size;                      /* table size (0 means default) */
 337 int fs;                         /* field-separator character in base file */
 338 int cmap;                       /* case-map algorithm (0 means default) */
 339 off_t tagmask;                  /* 0 default, 1 no tags */
 340 {
 341         register char *fn;
 342         struct dbzconfig c;
 343         register off_t m;
 344         register FILE *f;
 345
 346         if (pagf != NULL) {
 347                 DEBUG(("dbzfresh: database already open\n"));
 348                 return(-1);
 349         }
 350         if (size != 0 && size < 2) {
 351                 DEBUG(("dbzfresh: preposterous size (%ld)\n", size));
 352                 return(-1);
 353         }
 354
 355         /* get default configuration */
 356         if (getconf((FILE *)NULL, (FILE *)NULL, &c) < 0)
 357                 return(-1);     /* "can't happen" */
 358
 359         /* and mess with it as specified */
 360         if (size != 0)
 361                 c.tsize = size;
 362         c.fieldsep = fs;
 363         switch (cmap) {
 364         case 0:
 365         case '0':
 366         case 'B':               /* 2.10 compat */
 367                 c.casemap = '0';        /* '\0' nicer, but '0' printable! */
 368                 break;
 369         case '=':
 370         case 'b':               /* 2.11 compat */
 371                 c.casemap = '=';
 372                 break;
 373         case 'C':
 374                 c.casemap = 'C';
 375                 break;
 376         case '?':
 377                 c.casemap = DEFCASE;
 378                 break;
 379         default:
 380                 DEBUG(("dbzfresh case map `%c' unknown\n", cmap));
 381                 return(-1);
 382                 break;
 383         }
 384         switch (tagmask) {
 385         case 0:                 /* default */
 386                 break;
 387         case 1:                 /* no tags */
 388                 c.tagshift = 0;
 389                 c.tagmask = 0;
 390                 c.tagenb = 0;
 391                 break;
 392         default:
 393                 m = tagmask;
 394                 c.tagshift = 0;
 395                 while (!(m&01)) {
 396                         m >>= 1;
 397                         c.tagshift++;
 398                 }
 399                 c.tagmask = m;
 400                 c.tagenb = (m << 1) & ~m;
 401                 break;
 402         }
 403
 404         /* write it out */
 405         fn = enstring(name, dir);
 406         if (fn == NULL)
 407                 return(-1);
 408         f = fopen(fn, "w");
 409         free(fn);
 410         if (f == NULL) {
 411                 DEBUG(("dbzfresh: unable to write config\n"));
 412                 return(-1);
 413         }
 414         if (putconf(f, &c) < 0) {
 415                 (void) fclose(f);
 416                 return(-1);
 417         }
 418         if (fclose(f) == EOF) {
 419                 DEBUG(("dbzfresh: fclose failure\n"));
 420                 return(-1);
 421         }
 422
 423         /* create/truncate .pag */
 424         fn = enstring(name, pag);
 425         if (fn == NULL)
 426                 return(-1);
 427         f = fopen(fn, "w");
 428         free(fn);
 429         if (f == NULL) {
 430                 DEBUG(("dbzfresh: unable to create/truncate .pag file\n"));
 431                 return(-1);
 432         } else
 433                 (void) fclose(f);
 434
 435         /* and punt to dbminit for the hard work */
 436         return(dbminit(name));
 437 }
 438
 439 /*
 440  - dbzsize - what's a good table size to hold this many entries?
 441  */
 442 long
 443 dbzsize(contents)
 444 long contents;                  /* 0 means what's the default */
 445 {
 446         register long n;
 447
 448         if (contents <= 0) {    /* foulup or default inquiry */
 449                 DEBUG(("dbzsize: preposterous input (%ld)\n", contents));
 450                 return(DEFSIZE);
 451         }
 452         n = (contents/2)*3;     /* try to keep table at most 2/3 full */
 453         if (!(n&01))            /* make it odd */
 454                 n++;
 455         DEBUG(("dbzsize: tentative size %ld\n", n));
 456         while (!isprime(n))     /* and look for a prime */
 457                 n += 2;
 458         DEBUG(("dbzsize: final size %ld\n", n));
 459
 460         return(n);
 461 }
 462
 463 /*
 464  - isprime - is a number prime?
 465  *
 466  * This is not a terribly efficient approach.
 467  */
 468 static int                      /* predicate */
 469 isprime(x)
 470 register long x;
 471 {
 472         static int quick[] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 0 };
 473         register int *ip;
 474         register long div;
 475         register long stop;
 476
 477         /* hit the first few primes quickly to eliminate easy ones */
 478         /* this incidentally prevents ridiculously small tables */
 479         for (ip = quick; (div = *ip) != 0; ip++)
 480                 if (x%div == 0) {
 481                         DEBUG(("isprime: quick result on %ld\n", (long)x));
 482                         return(0);
 483                 }
 484
 485         /* approximate square root of x */
 486         for (stop = x; x/stop < stop; stop >>= 1)
 487                 continue;
 488         stop <<= 1;
 489
 490         /* try odd numbers up to stop */
 491         for (div = *--ip; div < stop; div += 2)
 492                 if (x%div == 0)
 493                         return(0);
 494
 495         return(1);
 496 }
 497
 498 /*
 499  - dbzagain - set up a new database to be a rebuild of an old one
 500  */
 501 int                             /* 0 success, -1 failure */
 502 dbzagain(name, oldname)
 503 char *name;                     /* base name; .dir and .pag must exist */
 504 char *oldname;                  /* base name; all must exist */
 505 {
 506         register char *fn;
 507         struct dbzconfig c;
 508         register int i;
 509         register long top;
 510         register FILE *f;
 511         register int newtable;
 512         register off_t newsize;
 513
 514         if (pagf != NULL) {
 515                 DEBUG(("dbzagain: database already open\n"));
 516                 return(-1);
 517         }
 518
 519         /* pick up the old configuration */
 520         fn = enstring(oldname, dir);
 521         if (fn == NULL)
 522                 return(-1);
 523         f = fopen(fn, "r");
 524         free(fn);
 525         if (f == NULL) {
 526                 DEBUG(("dbzagain: cannot open old .dir file\n"));
 527                 return(-1);
 528         }
 529         i = getconf(f, (FILE *)NULL, &c);
 530         (void) fclose(f);
 531         if (i < 0) {
 532                 DEBUG(("dbzagain: getconf failed\n"));
 533                 return(-1);
 534         }
 535
 536         /* tinker with it */
 537         top = 0;
 538         newtable = 0;
 539         for (i = 0; i < NUSEDS; i++) {
 540                 if (top < c.used[i])
 541                         top = c.used[i];
 542                 if (c.used[i] == 0)
 543                         newtable = 1;   /* hasn't got full usage history yet */
 544         }
 545         if (top == 0) {
 546                 DEBUG(("dbzagain: old table has no contents!\n"));
 547                 newtable = 1;
 548         }
 549         for (i = NUSEDS-1; i > 0; i--)
 550                 c.used[i] = c.used[i-1];
 551         c.used[0] = 0;
 552         newsize = dbzsize(top);
 553         if (!newtable || newsize > c.tsize)     /* don't shrink new table */
 554                 c.tsize = newsize;
 555
 556         /* write it out */
 557         fn = enstring(name, dir);
 558         if (fn == NULL)
 559                 return(-1);
 560         f = fopen(fn, "w");
 561         free(fn);
 562         if (f == NULL) {
 563                 DEBUG(("dbzagain: unable to write new .dir\n"));
 564                 return(-1);
 565         }
 566         i = putconf(f, &c);
 567         (void) fclose(f);
 568         if (i < 0) {
 569                 DEBUG(("dbzagain: putconf failed\n"));
 570                 return(-1);
 571         }
 572
 573         /* create/truncate .pag */
 574         fn = enstring(name, pag);
 575         if (fn == NULL)
 576                 return(-1);
 577         f = fopen(fn, "w");
 578         free(fn);
 579         if (f == NULL) {
 580                 DEBUG(("dbzagain: unable to create/truncate .pag file\n"));
 581                 return(-1);
 582         } else
 583                 (void) fclose(f);
 584
 585         /* and let dbminit do the work */
 586         return(dbminit(name));
 587 }
 588
 589 /*
 590  - dbminit - open a database, creating it (using defaults) if necessary
 591  *
 592  * We try to leave errno set plausibly, to the extent that underlying
 593  * functions permit this, since many people consult it if dbminit() fails.
 594  */
 595 int                             /* 0 success, -1 failure */
 596 dbminit(name)
 597 char *name;
 598 {
 599         register int i;
 600         register size_t s;
 601         register char *dirfname;
 602         register char *pagfname;
 603
 604         if (pagf != NULL) {
 605                 DEBUG(("dbminit: dbminit already called once\n"));
 606                 errno = 0;
 607                 return(-1);
 608         }
 609
 610         /* open the .dir file */
 611         dirfname = enstring(name, dir);
 612         if (dirfname == NULL)
 613                 return(-1);
 614         dirf = fopen(dirfname, "r+");
 615         if (dirf == NULL) {
 616                 dirf = fopen(dirfname, "r");
 617                 dirronly = 1;
 618         } else
 619                 dirronly = 0;
 620         free(dirfname);
 621         if (dirf == NULL) {
 622                 DEBUG(("dbminit: can't open .dir file\n"));
 623                 return(-1);
 624         }
 625
 626         /* open the .pag file */
 627         pagfname = enstring(name, pag);
 628         if (pagfname == NULL) {
 629                 (void) fclose(dirf);
 630                 return(-1);
 631         }
 632         pagf = fopen(pagfname, "r+b");
 633         if (pagf == NULL) {
 634                 pagf = fopen(pagfname, "rb");
 635                 if (pagf == NULL) {
 636                         DEBUG(("dbminit: .pag open failed\n"));
 637                         (void) fclose(dirf);
 638                         free(pagfname);
 639                         return(-1);
 640                 }
 641                 pagronly = 1;
 642         } else if (dirronly)
 643                 pagronly = 1;
 644         else
 645                 pagronly = 0;
 646 #ifdef NOBUFFER
 647         /*
 648          * B News does not do adequate locking on its database accesses.
 649          * Why it doesn't get into trouble using dbm is a mystery.  In any
 650          * case, doing unbuffered i/o does not cure the problem, but does
 651          * enormously reduce its incidence.
 652          */
 653         (void) setbuf(pagf, (char *)NULL);
 654 #else
 655 #ifdef _IOFBF
 656         (void) setvbuf(pagf, (char *)pagbuf, _IOFBF, sizeof(pagbuf));
 657 #endif
 658 #endif
 659         pagpos = -1;
 660         /* don't free pagfname, need it below */
 661
 662         /* open the base file */
 663         basef = fopen(name, "r");
 664         if (basef == NULL) {
 665                 DEBUG(("dbminit: basefile open failed\n"));
 666                 basefname = enstring(name, "");
 667                 if (basefname == NULL) {
 668                         (void) fclose(pagf);
 669                         (void) fclose(dirf);
 670                         free(pagfname);
 671                         pagf = NULL;
 672                         return(-1);
 673                 }
 674         } else
 675                 basefname = NULL;
 676 #ifdef _IOFBF
 677         if (basef != NULL)
 678                 (void) setvbuf(basef, basebuf, _IOFBF, sizeof(basebuf));
 679 #endif
 680
 681         /* pick up configuration */
 682         if (getconf(dirf, pagf, &conf) < 0) {
 683                 DEBUG(("dbminit: getconf failure\n"));
 684                 (void) fclose(basef);
 685                 (void) fclose(pagf);
 686                 (void) fclose(dirf);
 687                 free(pagfname);
 688                 pagf = NULL;
 689                 errno = EDOM;   /* kind of a kludge, but very portable */
 690                 return(-1);
 691         }
 692         tagbits = conf.tagmask << conf.tagshift;
 693         taghere = conf.tagenb << conf.tagshift;
 694         tagboth = tagbits | taghere;
 695         mybytemap(mybmap);
 696         bytesame = 1;
 697         for (i = 0; i < SOF; i++)
 698                 if (mybmap[i] != conf.bytemap[i])
 699                         bytesame = 0;
 700
 701         /* get first table into core, if it looks desirable and feasible */
 702         s = (size_t)conf.tsize * SOF;
 703         if (incore && (off_t)(s/SOF) == conf.tsize) {
 704                 bufpagf = fopen(pagfname, (pagronly) ? "rb" : "r+b");
 705                 if (bufpagf != NULL)
 706                         corepag = getcore(bufpagf);
 707         } else {
 708                 bufpagf = NULL;
 709                 corepag = NULL;
 710         }
 711         free(pagfname);
 712
 713         /* misc. setup */
 714         crcinit();
 715         written = 0;
 716         prevp = FRESH;
 717         DEBUG(("dbminit: succeeded\n"));
 718         return(0);
 719 }
 720
 721 /*
 722  - enstring - concatenate two strings into a malloced area
 723  */
 724 static char *                   /* NULL if malloc fails */
 725 enstring(s1, s2)
 726 char *s1;
 727 char *s2;
 728 {
 729         register char *p;
 730
 731         p = malloc((size_t)strlen(s1) + (size_t)strlen(s2) + 1);
 732         if (p != NULL) {
 733                 (void) strcpy(p, s1);
 734                 (void) strcat(p, s2);
 735         } else {
 736                 DEBUG(("enstring(%s, %s) out of memory\n", s1, s2));
 737         }
 738         return(p);
 739 }
 740
 741 /*
 742  - dbmclose - close a database
 743  */
 744 int
 745 dbmclose()
 746 {
 747         register int ret = 0;
 748
 749         if (pagf == NULL) {
 750                 DEBUG(("dbmclose: not opened!\n"));
 751                 return(-1);
 752         }
 753
 754         if (fclose(pagf) == EOF) {
 755                 DEBUG(("dbmclose: fclose(pagf) failed\n"));
 756                 ret = -1;
 757         }
 758         pagf = basef;           /* ensure valid pointer; dbzsync checks it */
 759         if (dbzsync() < 0)
 760                 ret = -1;
 761         if (bufpagf != NULL && fclose(bufpagf) == EOF) {
 762                 DEBUG(("dbmclose: fclose(bufpagf) failed\n"));
 763                 ret = -1;
 764         }
 765         if (corepag != NULL)
 766                 free((char *)corepag);
 767         corepag = NULL;
 768         if (fclose(basef) == EOF) {
 769                 DEBUG(("dbmclose: fclose(basef) failed\n"));
 770                 ret = -1;
 771         }
 772         if (basefname != NULL)
 773                 free(basefname);
 774         basef = NULL;
 775         pagf = NULL;
 776         if (fclose(dirf) == EOF) {
 777                 DEBUG(("dbmclose: fclose(dirf) failed\n"));
 778                 ret = -1;
 779         }
 780
 781         DEBUG(("dbmclose: %s\n", (ret == 0) ? "succeeded" : "failed"));
 782         return(ret);
 783 }
 784
 785 /*
 786  - dbzsync - push all in-core data out to disk
 787  */
 788 int
 789 dbzsync()
 790 {
 791         register int ret = 0;
 792
 793         if (pagf == NULL) {
 794                 DEBUG(("dbzsync: not opened!\n"));
 795                 return(-1);
 796         }
 797         if (!written)
 798                 return(0);
 799
 800         if (corepag != NULL) {
 801                 if (putcore(corepag, bufpagf) < 0) {
 802                         DEBUG(("dbzsync: putcore failed\n"));
 803                         ret = -1;
 804                 }
 805         }
 806         if (!conf.olddbz)
 807                 if (putconf(dirf, &conf) < 0)
 808                         ret = -1;
 809
 810         DEBUG(("dbzsync: %s\n", (ret == 0) ? "succeeded" : "failed"));
 811         return(ret);
 812 }
 813
 814 /*
 815  - dbzcancel - cancel writing of in-core data
 816  * Mostly for use from child processes.
 817  * Note that we don't need to futz around with stdio buffers, because we
 818  * always fflush them immediately anyway and so they never have stale data.
 819  */
 820 int
 821 dbzcancel()
 822 {
 823         if (pagf == NULL) {
 824                 DEBUG(("dbzcancel: not opened!\n"));
 825                 return(-1);
 826         }
 827
 828         written = 0;
 829         return(0);
 830 }
 831
 832 /*
 833  - dbzfetch - fetch() with case mapping built in
 834  */
 835 datum
 836 dbzfetch(key)
 837 datum key;
 838 {
 839         char buffer[DBZMAXKEY + 1];
 840         datum mappedkey;
 841         register size_t keysize;
 842
 843         DEBUG(("dbzfetch: (%s)\n", key.dptr));
 844
 845         /* Key is supposed to be less than DBZMAXKEY */
 846         keysize = key.dsize;
 847         if (keysize >= DBZMAXKEY) {
 848                 keysize = DBZMAXKEY;
 849                 DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
 850         }
 851
 852         mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
 853         buffer[keysize] = '\0'; /* just a debug aid */
 854         mappedkey.dsize = keysize;
 855
 856         return(fetch(mappedkey));
 857 }
 858
 859 /*
 860  - fetch - get an entry from the database
 861  *
 862  * Disgusting fine point, in the name of backward compatibility:  if the
 863  * last character of "key" is a NUL, that character is (effectively) not
 864  * part of the comparison against the stored keys.
 865  */
 866 datum                           /* dptr NULL, dsize 0 means failure */
 867 fetch(key)
 868 datum key;
 869 {
 870         char buffer[DBZMAXKEY + 1];
 871         static off_t key_ptr;           /* return value points here */
 872         datum output;
 873         register size_t keysize;
 874         register size_t cmplen;
 875         register char *sepp;
 876
 877         DEBUG(("fetch: (%s)\n", key.dptr));
 878         output.dptr = NULL;
 879         output.dsize = 0;
 880         prevp = FRESH;
 881
 882         /* Key is supposed to be less than DBZMAXKEY */
 883         keysize = key.dsize;
 884         if (keysize >= DBZMAXKEY) {
 885                 keysize = DBZMAXKEY;
 886                 DEBUG(("keysize is %d - truncated to %d\n", key.dsize, DBZMAXKEY));
 887         }
 888
 889         if (pagf == NULL) {
 890                 DEBUG(("fetch: database not open!\n"));
 891                 return(output);
 892         } else if (basef == NULL) {     /* basef didn't exist yet */
 893                 basef = latebase();
 894                 if (basef == NULL)
 895                         return(output);
 896         }
 897
 898         cmplen = keysize;
 899         sepp = &conf.fieldsep;
 900         if (key.dptr[keysize-1] == '\0') {
 901                 cmplen--;
 902                 sepp = &buffer[keysize-1];
 903         }
 904         start(&srch, &key, FRESH);
 905         while ((key_ptr = search(&srch)) != NOTFOUND) {
 906                 DEBUG(("got 0x%lx\n", key_ptr));
 907
 908                 /* fetch the key */
 909                 if (fseek(basef, key_ptr, SEEK_SET) != 0) {
 910                         DEBUG(("fetch: seek failed\n"));
 911                         return(output);
 912                 }
 913                 if (fread(buffer, 1, keysize, basef) != keysize) {
 914                         DEBUG(("fetch: read failed\n"));
 915                         return(output);
 916                 }
 917
 918                 /* try it */
 919                 buffer[keysize] = '\0';         /* terminated for DEBUG */
 920                 (void) mapcase(buffer, buffer, keysize);
 921                 DEBUG(("fetch: buffer (%s) looking for (%s) size = %d\n",
 922                                                 buffer, key.dptr, keysize));
 923                 if (memcmp(key.dptr, buffer, cmplen) == 0 &&
 924                                 (*sepp == conf.fieldsep || *sepp == '\0')) {
 925                         /* we found it */
 926                         output.dptr = (char *)&key_ptr;
 927                         output.dsize = SOF;
 928                         DEBUG(("fetch: successful\n"));
 929                         return(output);
 930                 }
 931         }
 932
 933         /* we didn't find it */
 934         DEBUG(("fetch: failed\n"));
 935         prevp = &srch;                  /* remember where we stopped */
 936         return(output);
 937 }
 938
 939 /*
 940  - latebase - try to open a base file that wasn't there at the start
 941  */
 942 static FILE *
 943 latebase()
 944 {
 945         register FILE *it;
 946
 947         if (basefname == NULL) {
 948                 DEBUG(("latebase: name foulup\n"));
 949                 return(NULL);
 950         }
 951         it = fopen(basefname, "r");
 952         if (it == NULL) {
 953                 DEBUG(("latebase: still can't open base\n"));
 954         } else {
 955                 DEBUG(("latebase: late open succeeded\n"));
 956                 free(basefname);
 957                 basefname = NULL;
 958 #ifdef _IOFBF
 959                 (void) setvbuf(it, basebuf, _IOFBF, sizeof(basebuf));
 960 #endif
 961         }
 962         return(it);
 963 }
 964
 965 /*
 966  - dbzstore - store() with case mapping built in
 967  */
 968 int
 969 dbzstore(key, data)
 970 datum key;
 971 datum data;
 972 {
 973         char buffer[DBZMAXKEY + 1];
 974         datum mappedkey;
 975         register size_t keysize;
 976
 977         DEBUG(("dbzstore: (%s)\n", key.dptr));
 978
 979         /* Key is supposed to be less than DBZMAXKEY */
 980         keysize = key.dsize;
 981         if (keysize >= DBZMAXKEY) {
 982                 DEBUG(("dbzstore: key size too big (%d)\n", key.dsize));
 983                 return(-1);
 984         }
 985
 986         mappedkey.dptr = mapcase(buffer, key.dptr, keysize);
 987         buffer[keysize] = '\0'; /* just a debug aid */
 988         mappedkey.dsize = keysize;
 989
 990         return(store(mappedkey, data));
 991 }
 992
 993 /*
 994  - store - add an entry to the database
 995  */
 996 int                             /* 0 success, -1 failure */
 997 store(key, data)
 998 datum key;
 999 datum data;
1000 {
1001         off_t value;
1002
1003         if (pagf == NULL) {
1004                 DEBUG(("store: database not open!\n"));
1005                 return(-1);
1006         } else if (basef == NULL) {     /* basef didn't exist yet */
1007                 basef = latebase();
1008                 if (basef == NULL)
1009                         return(-1);
1010         }
1011         if (pagronly) {
1012                 DEBUG(("store: database open read-only\n"));
1013                 return(-1);
1014         }
1015         if (data.dsize != SOF) {
1016                 DEBUG(("store: value size wrong (%d)\n", data.dsize));
1017                 return(-1);
1018         }
1019         if (key.dsize >= DBZMAXKEY) {
1020                 DEBUG(("store: key size too big (%d)\n", key.dsize));
1021                 return(-1);
1022         }
1023
1024         /* copy the value in to ensure alignment */
1025         (void) memcpy((char *)&value, data.dptr, SOF);
1026         DEBUG(("store: (%s, %ld)\n", key.dptr, (long)value));
1027         if (!okayvalue(value)) {
1028                 DEBUG(("store: reserved bit or overflow in 0x%lx\n", value));
1029                 return(-1);
1030         }
1031
1032         /* find the place, exploiting previous search if possible */
1033         start(&srch, &key, prevp);
1034         while (search(&srch) != NOTFOUND)
1035                 continue;
1036
1037         prevp = FRESH;
1038         conf.used[0]++;
1039         DEBUG(("store: used count %ld\n", conf.used[0]));
1040         written = 1;
1041         return(set(&srch, value));
1042 }
1043
1044 /*
1045  - dbzincore - control attempts to keep .pag file in core
1046  */
1047 int                             /* old setting */
1048 dbzincore(value)
1049 int value;
1050 {
1051         register int old = incore;
1052
1053         incore = value;
1054         return(old);
1055 }
1056
1057 /*
1058  - getconf - get configuration from .dir file
1059  */
1060 static int                      /* 0 success, -1 failure */
1061 getconf(df, pf, cp)
1062 register FILE *df;              /* NULL means just give me the default */
1063 register FILE *pf;              /* NULL means don't care about .pag */
1064 register struct dbzconfig *cp;
1065 {
1066         register int c;
1067         register int i;
1068         int err = 0;
1069
1070         c = (df != NULL) ? getc(df) : EOF;
1071         if (c == EOF) {         /* empty file, no configuration known */
1072                 cp->olddbz = 0;
1073                 if (df != NULL && pf != NULL && getc(pf) != EOF)
1074                         cp->olddbz = 1;
1075                 cp->tsize = DEFSIZE;
1076                 cp->fieldsep = '\t';
1077                 for (i = 0; i < NUSEDS; i++)
1078                         cp->used[i] = 0;
1079                 cp->valuesize = SOF;
1080                 mybytemap(cp->bytemap);
1081                 cp->casemap = DEFCASE;
1082                 cp->tagenb = TAGENB;
1083                 cp->tagmask = TAGMASK;
1084                 cp->tagshift = TAGSHIFT;
1085                 DEBUG(("getconf: defaults (%ld, %c, (0x%lx/0x%lx<<%d))\n",
1086                         cp->tsize, cp->casemap, cp->tagenb,
1087                         cp->tagmask, cp->tagshift));
1088                 return(0);
1089         }
1090         (void) ungetc(c, df);
1091
1092         /* first line, the vital stuff */
1093         if (getc(df) != 'd' || getc(df) != 'b' || getc(df) != 'z')
1094                 err = -1;
1095         if (getno(df, &err) != dbzversion)
1096                 err = -1;
1097         cp->tsize = getno(df, &err);
1098         cp->fieldsep = getno(df, &err);
1099         while ((c = getc(df)) == ' ')
1100                 continue;
1101         cp->casemap = c;
1102         cp->tagenb = getno(df, &err);
1103         cp->tagmask = getno(df, &err);
1104         cp->tagshift = getno(df, &err);
1105         cp->valuesize = getno(df, &err);
1106         if (cp->valuesize != SOF) {
1107                 DEBUG(("getconf: wrong off_t size (%d)\n", cp->valuesize));
1108                 err = -1;
1109                 cp->valuesize = SOF;    /* to protect the loops below */
1110         }
1111         for (i = 0; i < cp->valuesize; i++)
1112                 cp->bytemap[i] = getno(df, &err);
1113         if (getc(df) != '\n')
1114                 err = -1;
1115         DEBUG(("size %ld, sep %d, cmap %c, tags 0x%lx/0x%lx<<%d, ", cp->tsize,
1116                         cp->fieldsep, cp->casemap, cp->tagenb, cp->tagmask,
1117                         cp->tagshift));
1118         DEBUG(("bytemap (%d)", cp->valuesize));
1119         for (i = 0; i < cp->valuesize; i++) {
1120                 DEBUG((" %d", cp->bytemap[i]));
1121         }
1122         DEBUG(("\n"));
1123
1124         /* second line, the usages */
1125         for (i = 0; i < NUSEDS; i++)
1126                 cp->used[i] = getno(df, &err);
1127         if (getc(df) != '\n')
1128                 err = -1;
1129         DEBUG(("used %ld %ld %ld...\n", cp->used[0], cp->used[1], cp->used[2]));
1130
1131         if (err < 0) {
1132                 DEBUG(("getconf error\n"));
1133                 return(-1);
1134         }
1135         return(0);
1136 }
1137
1138 /*
1139  - getno - get a long
1140  */
1141 static long
1142 getno(f, ep)
1143 FILE *f;
1144 int *ep;
1145 {
1146         register char *p;
1147 #       define  MAXN    50
1148         char getbuf[MAXN];
1149         register int c;
1150
1151         while ((c = getc(f)) == ' ')
1152                 continue;
1153         if (c == EOF || c == '\n') {
1154                 DEBUG(("getno: missing number\n"));
1155                 *ep = -1;
1156                 return(0);
1157         }
1158         p = getbuf;
1159         *p++ = c;
1160         while ((c = getc(f)) != EOF && c != '\n' && c != ' ')
1161                 if (p < &getbuf[MAXN-1])
1162                         *p++ = c;
1163         if (c == EOF) {
1164                 DEBUG(("getno: EOF\n"));
1165                 *ep = -1;
1166         } else
1167                 (void) ungetc(c, f);
1168         *p = '\0';
1169
1170         if (strspn(getbuf, "-1234567890") != strlen(getbuf)) {
1171                 DEBUG(("getno: `%s' non-numeric\n", getbuf));
1172                 *ep = -1;
1173         }
1174         return(atol(getbuf));
1175 }
1176
1177 /*
1178  - putconf - write configuration to .dir file
1179  */
1180 static int                      /* 0 success, -1 failure */
1181 putconf(f, cp)
1182 register FILE *f;
1183 register struct dbzconfig *cp;
1184 {
1185         register int i;
1186         register int ret = 0;
1187
1188         if (fseek(f, 0, SEEK_SET) != 0) {
1189                 DEBUG(("fseek failure in putconf\n"));
1190                 ret = -1;
1191         }
1192         fprintf(f, "dbz %d %ld %d %c %ld %ld %d %d", dbzversion,
1193                 (long)cp->tsize,
1194                 cp->fieldsep, cp->casemap, (long)cp->tagenb,
1195                 (long)cp->tagmask, cp->tagshift,
1196                 cp->valuesize);
1197
1198         for (i = 0; i < cp->valuesize; i++)
1199                 fprintf(f, " %d", cp->bytemap[i]);
1200         fprintf(f, "\n");
1201         for (i = 0; i < NUSEDS; i++)
1202                 fprintf(f, "%ld%c",
1203                         (long)cp->used[i], (i < NUSEDS-1) ? ' ' : '\n');
1204
1205
1206         (void) fflush(f);
1207         if (ferror(f))
1208                 ret = -1;
1209
1210         DEBUG(("putconf status %d\n", ret));
1211         return(ret);
1212 }
1213
1214 /*
1215  - getcore - try to set up an in-core copy of .pag file
1216  */
1217 static off_t *                  /* pointer to copy, or NULL */
1218 getcore(f)
1219 FILE *f;
1220 {
1221         register off_t *p;
1222         register size_t i;
1223         register size_t nread;
1224         register char *it;
1225
1226         it = malloc((size_t)conf.tsize * SOF);
1227         if (it == NULL) {
1228                 DEBUG(("getcore: malloc failed\n"));
1229                 return(NULL);
1230         }
1231
1232         nread = fread(it, SOF, (size_t)conf.tsize, f);
1233         if (ferror(f)) {
1234                 DEBUG(("getcore: read failed\n"));
1235                 free(it);
1236                 return(NULL);
1237         }
1238
1239         p = (off_t *)it + nread;
1240         i = (size_t)conf.tsize - nread;
1241         while (i-- > 0)
1242                 *p++ = VACANT;
1243         return((off_t *)it);
1244 }
1245
1246 /*
1247  - putcore - try to rewrite an in-core table
1248  */
1249 static int                      /* 0 okay, -1 fail */
1250 putcore(tab, f)
1251 off_t *tab;
1252 FILE *f;
1253 {
1254         if (fseek(f, 0, SEEK_SET) != 0) {
1255                 DEBUG(("fseek failure in putcore\n"));
1256                 return(-1);
1257         }
1258         (void) fwrite((char *)tab, SOF, (size_t)conf.tsize, f);
1259         (void) fflush(f);
1260         return((ferror(f)) ? -1 : 0);
1261 }
1262
1263 /*
1264  - start - set up to start or restart a search
1265  */
1266 static void
1267 start(sp, kp, osp)
1268 register struct searcher *sp;
1269 register datum *kp;
1270 register struct searcher *osp;          /* may be FRESH, i.e. NULL */
1271 {
1272         register long h;
1273
1274         h = hash(kp->dptr, kp->dsize);
1275         if (osp != FRESH && osp->hash == h) {
1276                 if (sp != osp)
1277                         *sp = *osp;
1278                 DEBUG(("search restarted\n"));
1279         } else {
1280                 sp->hash = h;
1281                 sp->tag = MKTAG(h / conf.tsize);
1282                 DEBUG(("tag 0x%lx\n", sp->tag));
1283                 sp->place = h % conf.tsize;
1284                 sp->tabno = 0;
1285                 sp->run = (conf.olddbz) ? conf.tsize : MAXRUN;
1286                 sp->aborted = 0;
1287         }
1288         sp->seen = 0;
1289 }
1290
1291 /*
1292  - search - conduct part of a search
1293  */
1294 static off_t                    /* NOTFOUND if we hit VACANT or error */
1295 search(sp)
1296 register struct searcher *sp;
1297 {
1298         register off_t dest;
1299         register off_t value;
1300         off_t val;              /* buffer for value (can't fread register) */
1301         register off_t place;
1302
1303         if (sp->aborted)
1304                 return(NOTFOUND);
1305
1306         for (;;) {
1307                 /* determine location to be examined */
1308                 place = sp->place;
1309                 if (sp->seen) {
1310                         /* go to next location */
1311                         if (--sp->run <= 0) {
1312                                 sp->tabno++;
1313                                 sp->run = MAXRUN;
1314                         }
1315                         place = (place+1)%conf.tsize + sp->tabno*conf.tsize;
1316                         sp->place = place;
1317                 } else
1318                         sp->seen = 1;   /* now looking at current location */
1319                 DEBUG(("search @ %ld\n", place));
1320
1321                 /* get the tagged value */
1322                 if (corepag != NULL && place < conf.tsize) {
1323                         DEBUG(("search: in core\n"));
1324                         value = MAPIN(corepag[place]);
1325                 } else {
1326                         /* seek, if necessary */
1327                         dest = place * SOF;
1328                         if (pagpos != dest) {
1329                                 if (fseek(pagf, dest, SEEK_SET) != 0) {
1330                                         DEBUG(("search: seek failed\n"));
1331                                         pagpos = -1;
1332                                         sp->aborted = 1;
1333                                         return(NOTFOUND);
1334                                 }
1335                                 pagpos = dest;
1336                         }
1337
1338                         /* read it */
1339                         if (fread((char *)&val, sizeof(val), 1, pagf) == 1)
1340                                 value = MAPIN(val);
1341                         else if (ferror(pagf)) {
1342                                 DEBUG(("search: read failed\n"));
1343                                 pagpos = -1;
1344                                 sp->aborted = 1;
1345                                 return(NOTFOUND);
1346                         } else
1347                                 value = VACANT;
1348
1349                         /* and finish up */
1350                         pagpos += sizeof(val);
1351                 }
1352
1353                 /* vacant slot is always cause to return */
1354                 if (value == VACANT) {
1355                         DEBUG(("search: empty slot\n"));
1356                         return(NOTFOUND);
1357                 };
1358
1359                 /* check the tag */
1360                 value = UNBIAS(value);
1361                 DEBUG(("got 0x%lx\n", value));
1362                 if (!HASTAG(value)) {
1363                         DEBUG(("tagless\n"));
1364                         return(value);
1365                 } else if (TAG(value) == sp->tag) {
1366                         DEBUG(("match\n"));
1367                         return(NOTAG(value));
1368                 } else {
1369                         DEBUG(("mismatch 0x%lx\n", TAG(value)));
1370                 }
1371         }
1372         /* NOTREACHED */
1373 }
1374
1375 /*
1376  - okayvalue - check that a value can be stored
1377  */
1378 static int                      /* predicate */
1379 okayvalue(value)
1380 off_t value;
1381 {
1382         if (HASTAG(value))
1383                 return(0);
1384 #ifdef OVERFLOW
1385         if (value == LONG_MAX)  /* BIAS() and UNBIAS() will overflow */
1386                 return(0);
1387 #endif
1388         return(1);
1389 }
1390
1391 /*
1392  - set - store a value into a location previously found by search
1393  */
1394 static int                      /* 0 success, -1 failure */
1395 set(sp, value)
1396 register struct searcher *sp;
1397 off_t value;
1398 {
1399         register off_t place = sp->place;
1400         register off_t v = value;
1401
1402         if (sp->aborted)
1403                 return(-1);
1404
1405         if (CANTAG(v) && !conf.olddbz) {
1406                 v |= sp->tag | taghere;
1407                 if (v != UNBIAS(VACANT))        /* BIAS(v) won't look VACANT */
1408 #ifdef OVERFLOW
1409                         if (v != LONG_MAX)      /* and it won't overflow */
1410 #endif
1411                         value = v;
1412         }
1413         DEBUG(("tagged value is 0x%lx\n", value));
1414         value = BIAS(value);
1415         value = MAPOUT(value);
1416
1417         /* If we have the index file in memory, use it */
1418         if (corepag != NULL && place < conf.tsize) {
1419                 corepag[place] = value;
1420                 DEBUG(("set: incore\n"));
1421                 return(0);
1422         }
1423
1424         /* seek to spot */
1425         pagpos = -1;            /* invalidate position memory */
1426         if (fseek(pagf, place * SOF, SEEK_SET) != 0) {
1427                 DEBUG(("set: seek failed\n"));
1428                 sp->aborted = 1;
1429                 return(-1);
1430         }
1431
1432         /* write in data */
1433         if (fwrite((char *)&value, SOF, 1, pagf) != 1) {
1434                 DEBUG(("set: write failed\n"));
1435                 sp->aborted = 1;
1436                 return(-1);
1437         }
1438         /* fflush improves robustness, and buffer re-use is rare anyway */
1439         if (fflush(pagf) == EOF) {
1440                 DEBUG(("set: fflush failed\n"));
1441                 sp->aborted = 1;
1442                 return(-1);
1443         }
1444
1445         DEBUG(("set: succeeded\n"));
1446         return(0);
1447 }
1448
1449 /*
1450  - mybytemap - determine this machine's byte map
1451  *
1452  * A byte map is an array of ints, sizeof(off_t) of them.  The 0th int
1453  * is the byte number of the high-order byte in my off_t, and so forth.
1454  */
1455 static void
1456 mybytemap(map)
1457 int map[];                      /* -> int[SOF] */
1458 {
1459         union {
1460                 off_t o;
1461                 char c[SOF];
1462         } u;
1463         register int *mp = &map[SOF];
1464         register int ntodo;
1465         register int i;
1466
1467         u.o = 1;
1468         for (ntodo = (int)SOF; ntodo > 0; ntodo--) {
1469                 for (i = 0; i < SOF; i++)
1470                         if (u.c[i] != 0)
1471                                 break;
1472                 if (i == SOF) {
1473                         /* trouble -- set it to *something* consistent */
1474                         DEBUG(("mybytemap: nonexistent byte %d!!!\n", ntodo));
1475                         for (i = 0; i < SOF; i++)
1476                                 map[i] = i;
1477                         return;
1478                 }
1479                 DEBUG(("mybytemap: byte %d\n", i));
1480                 *--mp = i;
1481                 while (u.c[i] != 0)
1482                         u.o <<= 1;
1483         }
1484 }
1485
1486 /*
1487  - bytemap - transform an off_t from byte ordering map1 to map2
1488  */
1489 static off_t                    /* transformed result */
1490 bytemap(ino, map1, map2)
1491 off_t ino;
1492 int *map1;
1493 int *map2;
1494 {
1495         union oc {
1496                 off_t o;
1497                 char c[SOF];
1498         };
1499         union oc in;
1500         union oc out;
1501         register int i;
1502
1503         in.o = ino;
1504         for (i = 0; i < SOF; i++)
1505                 out.c[map2[i]] = in.c[map1[i]];
1506         return(out.o);
1507 }
1508
1509 /*
1510  * This is a simplified version of the pathalias hashing function.
1511  * Thanks to Steve Belovin and Peter Honeyman
1512  *
1513  * hash a string into a long int.  31 bit crc (from andrew appel).
1514  * the crc table is computed at run time by crcinit() -- we could
1515  * precompute, but it takes 1 clock tick on a 750.
1516  *
1517  * This fast table calculation works only if POLY is a prime polynomial
1518  * in the field of integers modulo 2.  Since the coefficients of a
1519  * 32-bit polynomial won't fit in a 32-bit word, the high-order bit is
1520  * implicit.  IT MUST ALSO BE THE CASE that the coefficients of orders
1521  * 31 down to 25 are zero.  Happily, we have candidates, from
1522  * E. J.  Watson, "Primitive Polynomials (Mod 2)", Math. Comp. 16 (1962):
1523  *      x^32 + x^7 + x^5 + x^3 + x^2 + x^1 + x^0
1524  *      x^31 + x^3 + x^0
1525  *
1526  * We reverse the bits to get:
1527  *      111101010000000000000000000000001 but drop the last 1
1528  *         f   5   0   0   0   0   0   0
1529  *      010010000000000000000000000000001 ditto, for 31-bit crc
1530  *         4   8   0   0   0   0   0   0
1531  */
1532
1533 #define POLY 0x48000000L        /* 31-bit polynomial (avoids sign problems) */
1534
1535 static long CrcTable[128];
1536
1537 /*
1538  - crcinit - initialize tables for hash function
1539  */
1540 static void
1541 crcinit()
1542 {
1543         register int i, j;
1544         register long sum;
1545
1546         for (i = 0; i < 128; ++i) {
1547                 sum = 0L;
1548                 for (j = 7 - 1; j >= 0; --j)
1549                         if (i & (1 << j))
1550                                 sum ^= POLY >> j;
1551                 CrcTable[i] = sum;
1552         }
1553         DEBUG(("crcinit: done\n"));
1554 }
1555
1556 /*
1557  - hash - Honeyman's nice hashing function
1558  */
1559 static long
1560 hash(name, size)
1561 register char *name;
1562 register int size;
1563 {
1564         register long sum = 0L;
1565
1566         while (size--) {
1567                 sum = (sum >> 7) ^ CrcTable[(sum ^ (*name++)) & 0x7f];
1568         }
1569         DEBUG(("hash: returns (%ld)\n", sum));
1570         return(sum);
1571 }
1572
1573 /*
1574  * case-mapping stuff
1575  *
1576  * Borrowed from C News, by permission of the authors.  Somewhat modified.
1577  *
1578  * We exploit the fact that we are dealing only with headers here, and
1579  * headers are limited to the ASCII characters by RFC822.  It is barely
1580  * possible that we might be dealing with a translation into another
1581  * character set, but in particular it's very unlikely for a header
1582  * character to be outside -128..255.
1583  *
1584  * Life would be a whole lot simpler if tolower() could safely and portably
1585  * be applied to any char.
1586  */
1587
1588 #define OFFSET  128             /* avoid trouble with negative chars */
1589
1590 /* must call casencmp before invoking TOLOW... */
1591 #define TOLOW(c)        (cmap[(c)+OFFSET])
1592
1593 /* ...but the use of it in CISTREQN is safe without the preliminary call (!) */
1594 /* CISTREQN is an optimised case-insensitive strncmp(a,b,n)==0; n > 0 */
1595 #define CISTREQN(a, b, n) \
1596         (TOLOW((a)[0]) == TOLOW((b)[0]) && casencmp(a, b, n) == 0)
1597
1598 #define MAPSIZE (256+OFFSET)
1599 static char cmap[MAPSIZE];      /* relies on init to '\0' */
1600 static int mprimed = 0;         /* has cmap been set up? */
1601
1602 /*
1603  - mapprime - set up case-mapping stuff
1604  */
1605 static void
1606 mapprime()
1607 {
1608         register char *lp;
1609         register char *up;
1610         register int c;
1611         register int i;
1612         static char lower[] = "abcdefghijklmnopqrstuvwxyz";
1613         static char upper[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1614
1615         for (lp = lower, up = upper; *lp != '\0'; lp++, up++) {
1616                 c = *lp;
1617                 cmap[c+OFFSET] = c;
1618                 cmap[*up+OFFSET] = c;
1619         }
1620         for (i = 0; i < MAPSIZE; i++)
1621                 if (cmap[i] == '\0')
1622                         cmap[i] = (char)(i-OFFSET);
1623         mprimed = 1;
1624 }
1625
1626 /*
1627  - casencmp - case-independent strncmp
1628  */
1629 static int                      /* < == > 0 */
1630 casencmp(s1, s2, len)
1631 char *s1;
1632 char *s2;
1633 int len;
1634 {
1635         register char *p1;
1636         register char *p2;
1637         register int n;
1638
1639         if (!mprimed)
1640                 mapprime();
1641
1642         p1 = s1;
1643         p2 = s2;
1644         n = len;
1645         while (--n >= 0 && *p1 != '\0' && TOLOW(*p1) == TOLOW(*p2)) {
1646                 p1++;
1647                 p2++;
1648         }
1649         if (n < 0)
1650                 return(0);
1651
1652         /*
1653          * The following case analysis is necessary so that characters
1654          * which look negative collate low against normal characters but
1655          * high against the end-of-string NUL.
1656          */
1657         if (*p1 == '\0' && *p2 == '\0')
1658                 return(0);
1659         else if (*p1 == '\0')
1660                 return(-1);
1661         else if (*p2 == '\0')
1662                 return(1);
1663         else
1664                 return(TOLOW(*p1) - TOLOW(*p2));
1665 }
1666
1667 /*
1668  - mapcase - do case-mapped copy
1669  */
1670 static char *                   /* returns src or dst */
1671 mapcase(dst, src, siz)
1672 char *dst;                      /* destination, used only if mapping needed */
1673 char *src;                      /* source; src == dst is legal */
1674 size_t siz;
1675 {
1676         register char *s;
1677         register char *d;
1678         register char *c;       /* case break */
1679         register char *e;       /* end of source */
1680
1681
1682         c = cipoint(src, siz);
1683         if (c == NULL)
1684                 return(src);
1685
1686         if (!mprimed)
1687                 mapprime();
1688         s = src;
1689         e = s + siz;
1690         d = dst;
1691
1692         while (s < c)
1693                 *d++ = *s++;
1694         while (s < e)
1695                 *d++ = TOLOW(*s++);
1696
1697         return(dst);
1698 }
1699
1700 /*
1701  - cipoint - where in this message-ID does it become case-insensitive?
1702  *
1703  * The RFC822 code is not quite complete.  Absolute, total, full RFC822
1704  * compliance requires a horrible parsing job, because of the arcane
1705  * quoting conventions -- abc"def"ghi is not equivalent to abc"DEF"ghi,
1706  * for example.  There are three or four things that might occur in the
1707  * domain part of a message-id that are case-sensitive.  They don't seem
1708  * to ever occur in real news, thank Cthulhu.  (What?  You were expecting
1709  * a merciful and forgiving deity to be invoked in connection with RFC822?
1710  * Forget it; none of them would come near it.)
1711  */
1712 static char *                   /* pointer into s, or NULL for "nowhere" */
1713 cipoint(s, siz)
1714 char *s;
1715 size_t siz;
1716 {
1717         register char *p;
1718         static char post[] = "postmaster";
1719         static int plen = sizeof(post)-1;
1720
1721         switch (conf.casemap) {
1722         case '0':               /* unmapped, sensible */
1723                 return(NULL);
1724                 break;
1725         case 'C':               /* C News, RFC 822 conformant (approx.) */
1726                 p = memchr(s, '@', siz);
1727                 if (p == NULL)                  /* no local/domain split */
1728                         return(NULL);           /* assume all local */
1729                 else if (p - (s+1) == plen && CISTREQN(s+1, post, plen)) {
1730                         /* crazy -- "postmaster" is case-insensitive */
1731                         return(s);
1732                 } else
1733                         return(p);
1734                 break;
1735         case '=':               /* 2.11, neither sensible nor conformant */
1736                 return(s);      /* all case-insensitive */
1737                 break;
1738         }
1739
1740         DEBUG(("cipoint: unknown case mapping `%c'\n", conf.casemap));
1741         return(NULL);           /* just leave it alone */
1742 }
1743
1744 /*
1745  - dbzdebug - control dbz debugging at run time
1746  */
1747 int                             /* old value */
1748 dbzdebug(value)
1749 int value;
1750 {
1751 #ifdef DBZDEBUG
1752         register int old = debug;
1753
1754         debug = value;
1755         return(old);
1756 #else
1757         return(-1);
1758 #endif
1759 }