hash-lookup.c

   1 #include "git-compat-util.h"
   2 #include "hash.h"
   3 #include "hash-lookup.h"
   4 #include "read-cache-ll.h"
   5
   6 static uint32_t take2(const struct object_id *oid, size_t ofs)
   7 {
   8         return ((oid->hash[ofs] << 8) | oid->hash[ofs + 1]);
   9 }
  10
  11 /*
  12  * Conventional binary search loop looks like this:
  13  *
  14  *      do {
  15  *              int mi = lo + (hi - lo) / 2;
  16  *              int cmp = "entry pointed at by mi" minus "target";
  17  *              if (!cmp)
  18  *                      return (mi is the wanted one)
  19  *              if (cmp > 0)
  20  *                      hi = mi; "mi is larger than target"
  21  *              else
  22  *                      lo = mi+1; "mi is smaller than target"
  23  *      } while (lo < hi);
  24  *
  25  * The invariants are:
  26  *
  27  * - When entering the loop, lo points at a slot that is never
  28  *   above the target (it could be at the target), hi points at a
  29  *   slot that is guaranteed to be above the target (it can never
  30  *   be at the target).
  31  *
  32  * - We find a point 'mi' between lo and hi (mi could be the same
  33  *   as lo, but never can be the same as hi), and check if it hits
  34  *   the target.  There are three cases:
  35  *
  36  *    - if it is a hit, we are happy.
  37  *
  38  *    - if it is strictly higher than the target, we update hi with
  39  *      it.
  40  *
  41  *    - if it is strictly lower than the target, we update lo to be
  42  *      one slot after it, because we allow lo to be at the target.
  43  *
  44  * When choosing 'mi', we do not have to take the "middle" but
  45  * anywhere in between lo and hi, as long as lo <= mi < hi is
  46  * satisfied.  When we somehow know that the distance between the
  47  * target and lo is much shorter than the target and hi, we could
  48  * pick mi that is much closer to lo than the midway.
  49  */
  50 /*
  51  * The table should contain "nr" elements.
  52  * The oid of element i (between 0 and nr - 1) should be returned
  53  * by "fn(i, table)".
  54  */
  55 int oid_pos(const struct object_id *oid, const void *table, size_t nr,
  56             oid_access_fn fn)
  57 {
  58         size_t hi = nr;
  59         size_t lo = 0;
  60         size_t mi = 0;
  61
  62         if (!nr)
  63                 return -1;
  64
  65         if (nr != 1) {
  66                 size_t lov, hiv, miv, ofs;
  67
  68                 for (ofs = 0; ofs < the_hash_algo->rawsz - 2; ofs += 2) {
  69                         lov = take2(fn(0, table), ofs);
  70                         hiv = take2(fn(nr - 1, table), ofs);
  71                         miv = take2(oid, ofs);
  72                         if (miv < lov)
  73                                 return -1;
  74                         if (hiv < miv)
  75                                 return index_pos_to_insert_pos(nr);
  76                         if (lov != hiv) {
  77                                 /*
  78                                  * At this point miv could be equal
  79                                  * to hiv (but hash could still be higher);
  80                                  * the invariant of (mi < hi) should be
  81                                  * kept.
  82                                  */
  83                                 mi = (nr - 1) * (miv - lov) / (hiv - lov);
  84                                 if (lo <= mi && mi < hi)
  85                                         break;
  86                                 BUG("assertion failed in binary search");
  87                         }
  88                 }
  89         }
  90
  91         do {
  92                 int cmp;
  93                 cmp = oidcmp(fn(mi, table), oid);
  94                 if (!cmp)
  95                         return mi;
  96                 if (cmp > 0)
  97                         hi = mi;
  98                 else
  99                         lo = mi + 1;
 100                 mi = lo + (hi - lo) / 2;
 101         } while (lo < hi);
 102         return index_pos_to_insert_pos(lo);
 103 }
 104
 105 int bsearch_hash(const unsigned char *hash, const uint32_t *fanout_nbo,
 106                  const unsigned char *table, size_t stride, uint32_t *result)
 107 {
 108         uint32_t hi, lo;
 109
 110         hi = ntohl(fanout_nbo[*hash]);
 111         lo = ((*hash == 0x0) ? 0 : ntohl(fanout_nbo[*hash - 1]));
 112
 113         while (lo < hi) {
 114                 unsigned mi = lo + (hi - lo) / 2;
 115                 int cmp = hashcmp(table + mi * stride, hash);
 116
 117                 if (!cmp) {
 118                         if (result)
 119                                 *result = mi;
 120                         return 1;
 121                 }
 122                 if (cmp > 0)
 123                         hi = mi;
 124                 else
 125                         lo = mi + 1;
 126         }
 127
 128         if (result)
 129                 *result = lo;
 130         return 0;
 131 }