helgrind/hg_wordset.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- Sets of words, with unique set identifiers.                  ---*/
   4 /*---                                                 hg_wordset.c ---*/
   5 /*--------------------------------------------------------------------*/
   6
   7 /*
   8    This file is part of Helgrind, a Valgrind tool for detecting errors
   9    in threaded programs.
  10
  11    Copyright (C) 2007-2017 OpenWorks LLP
  12        info@open-works.co.uk
  13
  14    This program is free software; you can redistribute it and/or
  15    modify it under the terms of the GNU General Public License as
  16    published by the Free Software Foundation; either version 2 of the
  17    License, or (at your option) any later version.
  18
  19    This program is distributed in the hope that it will be useful, but
  20    WITHOUT ANY WARRANTY; without even the implied warranty of
  21    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  22    General Public License for more details.
  23
  24    You should have received a copy of the GNU General Public License
  25    along with this program; if not, see <http://www.gnu.org/licenses/>.
  26
  27    The GNU General Public License is contained in the file COPYING.
  28
  29    Neither the names of the U.S. Department of Energy nor the
  30    University of California nor the names of its contributors may be
  31    used to endorse or promote products derived from this software
  32    without prior written permission.
  33 */
  34
  35 #include "pub_tool_basics.h"
  36 #include "pub_tool_libcassert.h"
  37 #include "pub_tool_libcbase.h"
  38 #include "pub_tool_libcprint.h"
  39 #include "pub_tool_threadstate.h"
  40 #include "pub_tool_wordfm.h"
  41
  42 #include "hg_basics.h"
  43 #include "hg_wordset.h"     /* self */
  44
  45 // define to 1 to have (a lot of) debugging of add/re-use/die WSU entries.
  46 #define HG_DEBUG 0
  47
  48 //------------------------------------------------------------------//
  49 //--- Word Cache                                                 ---//
  50 //------------------------------------------------------------------//
  51
  52 typedef
  53    struct { UWord arg1; UWord arg2; UWord res; }
  54    WCacheEnt;
  55
  56 /* Each cache is a fixed sized array of N_WCACHE_STAT_MAX entries.
  57    However only the first .dynMax are used.  This is because at some
  58    point, expanding the cache further overall gives a slowdown because
  59    searching more entries more than negates any performance advantage
  60    from caching those entries in the first place.  Hence use .dynMax
  61    to allow the size of the cache(s) to be set differently for each
  62    different WordSetU. */
  63 #define N_WCACHE_STAT_MAX 32
  64 typedef
  65    struct {
  66       WCacheEnt ent[N_WCACHE_STAT_MAX];
  67       UWord     dynMax; /* 1 .. N_WCACHE_STAT_MAX inclusive */
  68       UWord     inUse;  /* 0 .. dynMax inclusive */
  69    }
  70    WCache;
  71
  72 #define WCache_INIT(_zzcache,_zzdynmax)                              \
  73    do {                                                              \
  74       tl_assert((_zzdynmax) >= 1);                                   \
  75       tl_assert((_zzdynmax) <= N_WCACHE_STAT_MAX);                   \
  76       (_zzcache).dynMax = (_zzdynmax);                               \
  77       (_zzcache).inUse = 0;                                          \
  78    } while (0)
  79
  80 #define WCache_LOOKUP_AND_RETURN(_retty,_zzcache,_zzarg1,_zzarg2)    \
  81    do {                                                              \
  82       UWord   _i;                                                    \
  83       UWord   _arg1  = (UWord)(_zzarg1);                             \
  84       UWord   _arg2  = (UWord)(_zzarg2);                             \
  85       WCache* _cache = &(_zzcache);                                  \
  86       tl_assert(_cache->dynMax >= 1);                                \
  87       tl_assert(_cache->dynMax <= N_WCACHE_STAT_MAX);                \
  88       tl_assert(_cache->inUse >= 0);                                 \
  89       tl_assert(_cache->inUse <= _cache->dynMax);                    \
  90       if (_cache->inUse > 0) {                                       \
  91          if (_cache->ent[0].arg1 == _arg1                            \
  92              && _cache->ent[0].arg2 == _arg2)                        \
  93             return (_retty)_cache->ent[0].res;                       \
  94          for (_i = 1; _i < _cache->inUse; _i++) {                    \
  95             if (_cache->ent[_i].arg1 == _arg1                        \
  96                 && _cache->ent[_i].arg2 == _arg2) {                  \
  97                WCacheEnt tmp     = _cache->ent[_i-1];                \
  98                _cache->ent[_i-1] = _cache->ent[_i];                  \
  99                _cache->ent[_i]   = tmp;                              \
 100                return (_retty)_cache->ent[_i-1].res;                 \
 101             }                                                        \
 102          }                                                           \
 103       }                                                              \
 104    } while (0)
 105
 106 #define WCache_UPDATE(_zzcache,_zzarg1,_zzarg2,_zzresult)            \
 107    do {                                                              \
 108       Word    _i;                                                    \
 109       UWord   _arg1  = (UWord)(_zzarg1);                             \
 110       UWord   _arg2  = (UWord)(_zzarg2);                             \
 111       UWord   _res   = (UWord)(_zzresult);                           \
 112       WCache* _cache = &(_zzcache);                                  \
 113       tl_assert(_cache->dynMax >= 1);                                \
 114       tl_assert(_cache->dynMax <= N_WCACHE_STAT_MAX);                \
 115       tl_assert(_cache->inUse >= 0);                                 \
 116       tl_assert(_cache->inUse <= _cache->dynMax);                    \
 117       if (_cache->inUse < _cache->dynMax)                            \
 118          _cache->inUse++;                                            \
 119       for (_i = _cache->inUse-1; _i >= 1; _i--)                      \
 120          _cache->ent[_i] = _cache->ent[_i-1];                        \
 121       _cache->ent[0].arg1 = _arg1;                                   \
 122       _cache->ent[0].arg2 = _arg2;                                   \
 123       _cache->ent[0].res  = _res;                                    \
 124    } while (0)
 125
 126
 127 //------------------------------------------------------------------//
 128 //---                          WordSet                           ---//
 129 //---                       Implementation                       ---//
 130 //------------------------------------------------------------------//
 131
 132 typedef
 133    struct {
 134       WordSetU* owner; /* for sanity checking */
 135       UWord*    words;
 136       UWord     size; /* Really this should be SizeT */
 137    }
 138    WordVec;
 139
 140 /* ix2vec[0 .. ix2vec_used-1] are pointers to the lock sets (WordVecs)
 141    really.  vec2ix is the inverse mapping, mapping WordVec* to the
 142    corresponding ix2vec entry number.  The two mappings are mutually
 143    redundant.
 144
 145    If a WordVec WV is marked as dead by HG(dieWS), WV is removed from
 146    vec2ix. The entry of the dead WVs in ix2vec are used to maintain a
 147    linked list of free (to be re-used) ix2vec entries. */
 148 struct _WordSetU {
 149       void*     (*alloc)(const HChar*,SizeT);
 150       const HChar* cc;
 151       void      (*dealloc)(void*);
 152       WordFM*   vec2ix; /* WordVec-to-WordSet mapping tree */
 153       WordVec** ix2vec; /* WordSet-to-WordVec mapping array */
 154       UWord     ix2vec_size;
 155       UWord     ix2vec_used;
 156       WordVec** ix2vec_free;
 157       WordSet   empty; /* cached, for speed */
 158       /* Caches for some operations */
 159       WCache    cache_addTo;
 160       WCache    cache_delFrom;
 161       WCache    cache_intersect;
 162       WCache    cache_minus;
 163       /* Stats */
 164       UWord     n_add;
 165       UWord     n_add_uncached;
 166       UWord     n_del;
 167       UWord     n_del_uncached;
 168       UWord     n_die;
 169       UWord     n_union;
 170       UWord     n_intersect;
 171       UWord     n_intersect_uncached;
 172       UWord     n_minus;
 173       UWord     n_minus_uncached;
 174       UWord     n_elem;
 175       UWord     n_doubleton;
 176       UWord     n_isEmpty;
 177       UWord     n_isSingleton;
 178       UWord     n_anyElementOf;
 179       UWord     n_isSubsetOf;
 180    };
 181
 182 /* Create a new WordVec of the given size. */
 183
 184 static WordVec* new_WV_of_size ( WordSetU* wsu, UWord sz )
 185 {
 186    WordVec* wv;
 187    tl_assert(sz >= 0);
 188    wv = wsu->alloc( wsu->cc, sizeof(WordVec) );
 189    wv->owner = wsu;
 190    wv->words = NULL;
 191    wv->size = sz;
 192    if (sz > 0) {
 193      wv->words = wsu->alloc( wsu->cc, (SizeT)sz * sizeof(UWord) );
 194    }
 195    return wv;
 196 }
 197
 198 static void delete_WV ( WordVec* wv )
 199 {
 200    void (*dealloc)(void*) = wv->owner->dealloc;
 201    if (wv->words) {
 202       dealloc(wv->words);
 203    }
 204    dealloc(wv);
 205 }
 206 static void delete_WV_for_FM ( UWord wv ) {
 207    delete_WV( (WordVec*)wv );
 208 }
 209
 210 static Word cmp_WordVecs_for_FM ( UWord wv1W, UWord wv2W )
 211 {
 212    UWord    i;
 213    WordVec* wv1    = (WordVec*)wv1W;
 214    WordVec* wv2    = (WordVec*)wv2W;
 215
 216    // WordVecs with smaller size are smaller.
 217    if (wv1->size < wv2->size) {
 218       return -1;
 219    }
 220    if (wv1->size > wv2->size) {
 221       return 1;
 222    }
 223
 224    // Sizes are equal => order based on content.
 225    for (i = 0; i < wv1->size; i++) {
 226       if (wv1->words[i] == wv2->words[i])
 227          continue;
 228       if (wv1->words[i] < wv2->words[i])
 229          return -1;
 230       if (wv1->words[i] > wv2->words[i])
 231          return 1;
 232       tl_assert(0);
 233    }
 234    return 0; /* identical */
 235 }
 236
 237 static void ensure_ix2vec_space ( WordSetU* wsu )
 238 {
 239    UInt      i, new_sz;
 240    WordVec** new_vec;
 241    tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
 242    if (wsu->ix2vec_used < wsu->ix2vec_size)
 243       return;
 244    new_sz = 2 * wsu->ix2vec_size;
 245    if (new_sz == 0) new_sz = 1;
 246    new_vec = wsu->alloc( wsu->cc, new_sz * sizeof(WordVec*) );
 247    tl_assert(new_vec);
 248    for (i = 0; i < wsu->ix2vec_size; i++)
 249       new_vec[i] = wsu->ix2vec[i];
 250    if (wsu->ix2vec)
 251       wsu->dealloc(wsu->ix2vec);
 252    wsu->ix2vec = new_vec;
 253    wsu->ix2vec_size = new_sz;
 254 }
 255
 256 /* True if wv is a dead entry (i.e. is in the linked list of free to be re-used
 257    entries in ix2vec). */
 258 static inline Bool is_dead ( WordSetU* wsu, WordVec* wv )
 259 {
 260    if (wv == NULL) /* last element in free linked list in ix2vec */
 261       return True;
 262    else
 263       return (WordVec**)wv >= &(wsu->ix2vec[1])
 264          &&  (WordVec**)wv < &(wsu->ix2vec[wsu->ix2vec_size]);
 265 }
 266 /* Index into a WordSetU, doing the obvious range check.  Failure of
 267    the assertions marked XXX and YYY is an indication of passing the
 268    wrong WordSetU* in the public API of this module.
 269    Accessing a dead ws will assert. */
 270 static WordVec* do_ix2vec ( WordSetU* wsu, WordSet ws )
 271 {
 272    WordVec* wv;
 273    tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
 274    if (wsu->ix2vec_used > 0)
 275       tl_assert(wsu->ix2vec);
 276    /* If this assertion fails, it may mean you supplied a 'ws'
 277       that does not come from the 'wsu' universe. */
 278    tl_assert(ws < wsu->ix2vec_used); /* XXX */
 279    wv = wsu->ix2vec[ws];
 280    /* Make absolutely sure that 'ws' is a non dead member of 'wsu'. */
 281    tl_assert(wv);
 282    tl_assert(!is_dead(wsu,wv));
 283    tl_assert(wv->owner == wsu); /* YYY */
 284    return wv;
 285 }
 286
 287 /* Same as do_ix2vec but returns NULL for a dead ws. */
 288 static WordVec* do_ix2vec_with_dead ( WordSetU* wsu, WordSet ws )
 289 {
 290    WordVec* wv;
 291    tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
 292    if (wsu->ix2vec_used > 0)
 293       tl_assert(wsu->ix2vec);
 294    /* If this assertion fails, it may mean you supplied a 'ws'
 295       that does not come from the 'wsu' universe. */
 296    tl_assert(ws < wsu->ix2vec_used); /* XXX */
 297    wv = wsu->ix2vec[ws];
 298    /* Make absolutely sure that 'ws' is either dead or a member of 'wsu'. */
 299    if (is_dead(wsu,wv))
 300       wv = NULL;
 301    else
 302       tl_assert(wv->owner == wsu); /* YYY */
 303    return wv;
 304 }
 305
 306 /* See if wv is contained within wsu.  If so, deallocate wv and return
 307    the index of the already-present copy.  If not, add wv to both the
 308    vec2ix and ix2vec mappings and return its index.
 309 */
 310 static WordSet add_or_dealloc_WordVec( WordSetU* wsu, WordVec* wv_new )
 311 {
 312    Bool     have;
 313    WordVec* wv_old;
 314    UWord/*Set*/ ix_old = -1;
 315    /* Really WordSet, but need something that can safely be casted to
 316       a Word* in the lookupFM.  Making it WordSet (which is 32 bits)
 317       causes failures on a 64-bit platform. */
 318    tl_assert(wv_new->owner == wsu);
 319    have = VG_(lookupFM)( wsu->vec2ix,
 320                          (UWord*)&wv_old, (UWord*)&ix_old,
 321                          (UWord)wv_new );
 322    if (have) {
 323       tl_assert(wv_old != wv_new);
 324       tl_assert(wv_old);
 325       tl_assert(wv_old->owner == wsu);
 326       tl_assert(ix_old < wsu->ix2vec_used);
 327       tl_assert(wsu->ix2vec[ix_old] == wv_old);
 328       delete_WV( wv_new );
 329       return (WordSet)ix_old;
 330    } else if (wsu->ix2vec_free) {
 331       WordSet ws;
 332       tl_assert(is_dead(wsu,(WordVec*)wsu->ix2vec_free));
 333       ws = wsu->ix2vec_free - &(wsu->ix2vec[0]);
 334       tl_assert(wsu->ix2vec[ws] == NULL || is_dead(wsu,wsu->ix2vec[ws]));
 335       wsu->ix2vec_free = (WordVec **) wsu->ix2vec[ws];
 336       wsu->ix2vec[ws] = wv_new;
 337       VG_(addToFM)( wsu->vec2ix, (UWord)wv_new, ws );
 338       if (HG_DEBUG) VG_(printf)("aodW %s re-use free %d %p\n", wsu->cc, (Int)ws, wv_new );
 339       return ws;
 340    } else {
 341       ensure_ix2vec_space( wsu );
 342       tl_assert(wsu->ix2vec);
 343       tl_assert(wsu->ix2vec_used < wsu->ix2vec_size);
 344       wsu->ix2vec[wsu->ix2vec_used] = wv_new;
 345       VG_(addToFM)( wsu->vec2ix, (Word)wv_new, (Word)wsu->ix2vec_used );
 346       if (HG_DEBUG) VG_(printf)("aodW %s %d %p\n", wsu->cc, (Int)wsu->ix2vec_used, wv_new  );
 347       wsu->ix2vec_used++;
 348       tl_assert(wsu->ix2vec_used <= wsu->ix2vec_size);
 349       return (WordSet)(wsu->ix2vec_used - 1);
 350    }
 351 }
 352
 353
 354 WordSetU* HG_(newWordSetU) ( void* (*alloc_nofail)( const HChar*, SizeT ),
 355                              const HChar* cc,
 356                              void  (*dealloc)(void*),
 357                              Word  cacheSize )
 358 {
 359    WordSetU* wsu;
 360    WordVec*  empty;
 361
 362    wsu          = alloc_nofail( cc, sizeof(WordSetU) );
 363    VG_(memset)( wsu, 0, sizeof(WordSetU) );
 364    wsu->alloc   = alloc_nofail;
 365    wsu->cc      = cc;
 366    wsu->dealloc = dealloc;
 367    wsu->vec2ix  = VG_(newFM)( alloc_nofail, cc,
 368                               dealloc, cmp_WordVecs_for_FM );
 369    wsu->ix2vec_used = 0;
 370    wsu->ix2vec_size = 0;
 371    wsu->ix2vec      = NULL;
 372    wsu->ix2vec_free = NULL;
 373    WCache_INIT(wsu->cache_addTo,     cacheSize);
 374    WCache_INIT(wsu->cache_delFrom,   cacheSize);
 375    WCache_INIT(wsu->cache_intersect, cacheSize);
 376    WCache_INIT(wsu->cache_minus,     cacheSize);
 377    empty = new_WV_of_size( wsu, 0 );
 378    wsu->empty = add_or_dealloc_WordVec( wsu, empty );
 379
 380    return wsu;
 381 }
 382
 383 void HG_(deleteWordSetU) ( WordSetU* wsu )
 384 {
 385    void (*dealloc)(void*) = wsu->dealloc;
 386    tl_assert(wsu->vec2ix);
 387    VG_(deleteFM)( wsu->vec2ix, delete_WV_for_FM, NULL/*val-finalizer*/ );
 388    if (wsu->ix2vec)
 389       dealloc(wsu->ix2vec);
 390    dealloc(wsu);
 391 }
 392
 393 WordSet HG_(emptyWS) ( WordSetU* wsu )
 394 {
 395    return wsu->empty;
 396 }
 397
 398 Bool HG_(isEmptyWS) ( WordSetU* wsu, WordSet ws )
 399 {
 400    WordVec* wv = do_ix2vec( wsu, ws );
 401    wsu->n_isEmpty++;
 402    if (wv->size == 0) {
 403       tl_assert(ws == wsu->empty);
 404       return True;
 405    } else {
 406       tl_assert(ws != wsu->empty);
 407       return False;
 408    }
 409 }
 410
 411 Bool HG_(isSingletonWS) ( WordSetU* wsu, WordSet ws, UWord w )
 412 {
 413    WordVec* wv;
 414    tl_assert(wsu);
 415    wsu->n_isSingleton++;
 416    wv = do_ix2vec( wsu, ws );
 417    return (Bool)(wv->size == 1 && wv->words[0] == w);
 418 }
 419
 420 UWord HG_(cardinalityWS) ( WordSetU* wsu, WordSet ws )
 421 {
 422    WordVec* wv;
 423    tl_assert(wsu);
 424    wv = do_ix2vec( wsu, ws );
 425    tl_assert(wv->size >= 0);
 426    return wv->size;
 427 }
 428
 429 UWord HG_(anyElementOfWS) ( WordSetU* wsu, WordSet ws )
 430 {
 431    WordVec* wv;
 432    tl_assert(wsu);
 433    wsu->n_anyElementOf++;
 434    wv = do_ix2vec( wsu, ws );
 435    tl_assert(wv->size >= 1);
 436    return wv->words[0];
 437 }
 438
 439 UWord HG_(cardinalityWSU) ( WordSetU* wsu )
 440 {
 441    tl_assert(wsu);
 442    return wsu->ix2vec_used;
 443 }
 444
 445 void HG_(getPayloadWS) ( /*OUT*/UWord** words, /*OUT*/UWord* nWords,
 446                          WordSetU* wsu, WordSet ws )
 447 {
 448    WordVec* wv;
 449    if (HG_DEBUG) VG_(printf)("getPayloadWS %s %d\n", wsu->cc, (Int)ws);
 450    tl_assert(wsu);
 451    wv = do_ix2vec( wsu, ws );
 452    tl_assert(wv->size >= 0);
 453    *nWords = wv->size;
 454    *words  = wv->words;
 455 }
 456
 457 void HG_(dieWS) ( WordSetU* wsu, WordSet ws )
 458 {
 459    WordVec* wv = do_ix2vec_with_dead( wsu, ws );
 460    WordVec* wv_in_vec2ix;
 461    UWord/*Set*/ wv_ix = -1;
 462
 463    if (HG_DEBUG) VG_(printf)("dieWS %s %d %p\n", wsu->cc, (Int)ws, wv);
 464
 465    if (ws == 0)
 466       return; // we never die the empty set.
 467
 468    if (!wv)
 469       return; // already dead. (or a bug ?).
 470
 471    wsu->n_die++;
 472
 473
 474    wsu->ix2vec[ws] = (WordVec*) wsu->ix2vec_free;
 475    wsu->ix2vec_free = &wsu->ix2vec[ws];
 476
 477    VG_(delFromFM) ( wsu->vec2ix,
 478                     (UWord*)&wv_in_vec2ix, (UWord*)&wv_ix,
 479                     (UWord)wv );
 480
 481    if (HG_DEBUG) VG_(printf)("dieWS wv_ix %d\n", (Int)wv_ix);
 482    tl_assert (wv_ix);
 483    tl_assert (wv_ix == ws);
 484
 485    delete_WV( wv );
 486
 487    wsu->cache_addTo.inUse = 0;
 488    wsu->cache_delFrom.inUse = 0;
 489    wsu->cache_intersect.inUse = 0;
 490    wsu->cache_minus.inUse = 0;
 491 }
 492
 493 Bool HG_(plausibleWS) ( WordSetU* wsu, WordSet ws )
 494 {
 495    if (wsu == NULL) return False;
 496    if (ws < 0 || ws >= wsu->ix2vec_used)
 497       return False;
 498    return True;
 499 }
 500
 501 Bool HG_(saneWS_SLOW) ( WordSetU* wsu, WordSet ws )
 502 {
 503    WordVec* wv;
 504    UWord    i;
 505    if (wsu == NULL) return False;
 506    if (ws < 0 || ws >= wsu->ix2vec_used)
 507       return False;
 508    wv = do_ix2vec( wsu, ws );
 509    /* can never happen .. do_ix2vec will assert instead.  Oh well. */
 510    if (wv->owner != wsu) return False;
 511    if (wv->size < 0) return False;
 512    if (wv->size > 0) {
 513       for (i = 0; i < wv->size-1; i++) {
 514          if (wv->words[i] >= wv->words[i+1])
 515             return False;
 516       }
 517    }
 518    return True;
 519 }
 520
 521 Bool HG_(elemWS) ( WordSetU* wsu, WordSet ws, UWord w )
 522 {
 523    UWord    i;
 524    WordVec* wv = do_ix2vec( wsu, ws );
 525    wsu->n_elem++;
 526    for (i = 0; i < wv->size; i++) {
 527       if (wv->words[i] == w)
 528          return True;
 529    }
 530    return False;
 531 }
 532
 533 WordSet HG_(doubletonWS) ( WordSetU* wsu, UWord w1, UWord w2 )
 534 {
 535    WordVec* wv;
 536    wsu->n_doubleton++;
 537    if (w1 == w2) {
 538       wv = new_WV_of_size(wsu, 1);
 539       wv->words[0] = w1;
 540    }
 541    else if (w1 < w2) {
 542       wv = new_WV_of_size(wsu, 2);
 543       wv->words[0] = w1;
 544       wv->words[1] = w2;
 545    }
 546    else {
 547       tl_assert(w1 > w2);
 548       wv = new_WV_of_size(wsu, 2);
 549       wv->words[0] = w2;
 550       wv->words[1] = w1;
 551    }
 552    return add_or_dealloc_WordVec( wsu, wv );
 553 }
 554
 555 WordSet HG_(singletonWS) ( WordSetU* wsu, UWord w )
 556 {
 557    return HG_(doubletonWS)( wsu, w, w );
 558 }
 559
 560 WordSet HG_(isSubsetOf) ( WordSetU* wsu, WordSet small, WordSet big )
 561 {
 562    wsu->n_isSubsetOf++;
 563    return small == HG_(intersectWS)( wsu, small, big );
 564 }
 565
 566 void HG_(ppWS) ( WordSetU* wsu, WordSet ws )
 567 {
 568    UWord    i;
 569    WordVec* wv;
 570    tl_assert(wsu);
 571    wv = do_ix2vec( wsu, ws );
 572    VG_(printf)("{");
 573    for (i = 0; i < wv->size; i++) {
 574       VG_(printf)("%p", (void*)wv->words[i]);
 575       if (i < wv->size-1)
 576          VG_(printf)(",");
 577    }
 578    VG_(printf)("}");
 579 }
 580
 581 void HG_(ppWSUstats) ( WordSetU* wsu, const HChar* name )
 582 {
 583    VG_(printf)("   WordSet \"%s\":\n", name);
 584    VG_(printf)("      addTo        %10lu (%lu uncached)\n",
 585                wsu->n_add, wsu->n_add_uncached);
 586    VG_(printf)("      delFrom      %10lu (%lu uncached)\n",
 587                wsu->n_del, wsu->n_del_uncached);
 588    VG_(printf)("      union        %10lu\n", wsu->n_union);
 589    VG_(printf)("      intersect    %10lu (%lu uncached) "
 590                "[nb. incl isSubsetOf]\n",
 591                wsu->n_intersect, wsu->n_intersect_uncached);
 592    VG_(printf)("      minus        %10lu (%lu uncached)\n",
 593                wsu->n_minus, wsu->n_minus_uncached);
 594    VG_(printf)("      elem         %10lu\n",   wsu->n_elem);
 595    VG_(printf)("      doubleton    %10lu\n",   wsu->n_doubleton);
 596    VG_(printf)("      isEmpty      %10lu\n",   wsu->n_isEmpty);
 597    VG_(printf)("      isSingleton  %10lu\n",   wsu->n_isSingleton);
 598    VG_(printf)("      anyElementOf %10lu\n",   wsu->n_anyElementOf);
 599    VG_(printf)("      isSubsetOf   %10lu\n",   wsu->n_isSubsetOf);
 600    VG_(printf)("      dieWS        %10lu\n",   wsu->n_die);
 601 }
 602
 603 WordSet HG_(addToWS) ( WordSetU* wsu, WordSet ws, UWord w )
 604 {
 605    UWord    k, j;
 606    WordVec* wv_new;
 607    WordVec* wv;
 608    WordSet  result = (WordSet)(-1); /* bogus */
 609
 610    wsu->n_add++;
 611    WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_addTo, ws, w);
 612    wsu->n_add_uncached++;
 613
 614    /* If already present, this is a no-op. */
 615    wv = do_ix2vec( wsu, ws );
 616    for (k = 0; k < wv->size; k++) {
 617       if (wv->words[k] == w) {
 618          result = ws;
 619          goto out;
 620       }
 621    }
 622    /* Ok, not present.  Build a new one ... */
 623    wv_new = new_WV_of_size( wsu, wv->size + 1 );
 624    k = j = 0;
 625    for (; k < wv->size && wv->words[k] < w; k++) {
 626       wv_new->words[j++] = wv->words[k];
 627    }
 628    wv_new->words[j++] = w;
 629    for (; k < wv->size; k++) {
 630       tl_assert(wv->words[k] > w);
 631       wv_new->words[j++] = wv->words[k];
 632    }
 633    tl_assert(j == wv_new->size);
 634
 635    /* Find any existing copy, or add the new one. */
 636    result = add_or_dealloc_WordVec( wsu, wv_new );
 637    tl_assert(result != (WordSet)(-1));
 638
 639   out:
 640    WCache_UPDATE(wsu->cache_addTo, ws, w, result);
 641    return result;
 642 }
 643
 644 WordSet HG_(delFromWS) ( WordSetU* wsu, WordSet ws, UWord w )
 645 {
 646    UWord    i, j, k;
 647    WordVec* wv_new;
 648    WordSet  result = (WordSet)(-1); /* bogus */
 649    WordVec* wv = do_ix2vec( wsu, ws );
 650
 651    wsu->n_del++;
 652
 653    /* special case empty set */
 654    if (wv->size == 0) {
 655       tl_assert(ws == wsu->empty);
 656       return ws;
 657    }
 658
 659    WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_delFrom, ws, w);
 660    wsu->n_del_uncached++;
 661
 662    /* If not already present, this is a no-op. */
 663    for (i = 0; i < wv->size; i++) {
 664       if (wv->words[i] == w)
 665          break;
 666    }
 667    if (i == wv->size) {
 668       result = ws;
 669       goto out;
 670    }
 671    /* So w is present in ws, and the new set will be one element
 672       smaller. */
 673    tl_assert(i >= 0 && i < wv->size);
 674    tl_assert(wv->size > 0);
 675
 676    wv_new = new_WV_of_size( wsu, wv->size - 1 );
 677    j = k = 0;
 678    for (; j < wv->size; j++) {
 679       if (j == i)
 680          continue;
 681       wv_new->words[k++] = wv->words[j];
 682    }
 683    tl_assert(k == wv_new->size);
 684
 685    result = add_or_dealloc_WordVec( wsu, wv_new );
 686    if (wv->size == 1) {
 687       tl_assert(result == wsu->empty);
 688    }
 689
 690   out:
 691    WCache_UPDATE(wsu->cache_delFrom, ws, w, result);
 692    return result;
 693 }
 694
 695 WordSet HG_(unionWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
 696 {
 697    UWord    i1, i2, k, sz;
 698    WordVec* wv_new;
 699    WordVec* wv1 = do_ix2vec( wsu, ws1 );
 700    WordVec* wv2 = do_ix2vec( wsu, ws2 );
 701    wsu->n_union++;
 702    sz = 0;
 703    i1 = i2 = 0;
 704    while (1) {
 705       if (i1 >= wv1->size || i2 >= wv2->size)
 706          break;
 707       sz++;
 708       if (wv1->words[i1] < wv2->words[i2]) {
 709          i1++;
 710       } else
 711       if (wv1->words[i1] > wv2->words[i2]) {
 712          i2++;
 713       } else {
 714          i1++;
 715          i2++;
 716       }
 717    }
 718    tl_assert(i1 <= wv1->size);
 719    tl_assert(i2 <= wv2->size);
 720    tl_assert(i1 == wv1->size || i2 == wv2->size);
 721    if (i1 == wv1->size && i2 < wv2->size) {
 722       sz += (wv2->size - i2);
 723    }
 724    if (i2 == wv2->size && i1 < wv1->size) {
 725       sz += (wv1->size - i1);
 726    }
 727
 728    wv_new = new_WV_of_size( wsu, sz );
 729    k = 0;
 730
 731    i1 = i2 = 0;
 732    while (1) {
 733       if (i1 >= wv1->size || i2 >= wv2->size)
 734          break;
 735       if (wv1->words[i1] < wv2->words[i2]) {
 736          wv_new->words[k++] = wv1->words[i1];
 737          i1++;
 738       } else
 739       if (wv1->words[i1] > wv2->words[i2]) {
 740          wv_new->words[k++] = wv2->words[i2];
 741          i2++;
 742       } else {
 743          wv_new->words[k++] = wv1->words[i1];
 744          i1++;
 745          i2++;
 746       }
 747    }
 748    tl_assert(i1 <= wv1->size);
 749    tl_assert(i2 <= wv2->size);
 750    tl_assert(i1 == wv1->size || i2 == wv2->size);
 751    if (i1 == wv1->size && i2 < wv2->size) {
 752       while (i2 < wv2->size)
 753          wv_new->words[k++] = wv2->words[i2++];
 754    }
 755    if (i2 == wv2->size && i1 < wv1->size) {
 756       while (i1 < wv1->size)
 757          wv_new->words[k++] = wv1->words[i1++];
 758    }
 759
 760    tl_assert(k == sz);
 761
 762    return add_or_dealloc_WordVec( wsu, wv_new );
 763 }
 764
 765 WordSet HG_(intersectWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
 766 {
 767    UWord    i1, i2, k, sz;
 768    WordSet  ws_new = (WordSet)(-1); /* bogus */
 769    WordVec* wv_new;
 770    WordVec* wv1;
 771    WordVec* wv2;
 772
 773    wsu->n_intersect++;
 774
 775    /* Deal with an obvious case fast. */
 776    if (ws1 == ws2)
 777       return ws1;
 778
 779    /* Since intersect(x,y) == intersect(y,x), convert both variants to
 780       the same query.  This reduces the number of variants the cache
 781       has to deal with. */
 782    if (ws1 > ws2) {
 783       WordSet wst = ws1; ws1 = ws2; ws2 = wst;
 784    }
 785
 786    WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_intersect, ws1, ws2);
 787    wsu->n_intersect_uncached++;
 788
 789    wv1 = do_ix2vec( wsu, ws1 );
 790    wv2 = do_ix2vec( wsu, ws2 );
 791    sz = 0;
 792    i1 = i2 = 0;
 793    while (1) {
 794       if (i1 >= wv1->size || i2 >= wv2->size)
 795          break;
 796       if (wv1->words[i1] < wv2->words[i2]) {
 797          i1++;
 798       } else
 799       if (wv1->words[i1] > wv2->words[i2]) {
 800          i2++;
 801       } else {
 802          sz++;
 803          i1++;
 804          i2++;
 805       }
 806    }
 807    tl_assert(i1 <= wv1->size);
 808    tl_assert(i2 <= wv2->size);
 809    tl_assert(i1 == wv1->size || i2 == wv2->size);
 810
 811    wv_new = new_WV_of_size( wsu, sz );
 812    k = 0;
 813
 814    i1 = i2 = 0;
 815    while (1) {
 816       if (i1 >= wv1->size || i2 >= wv2->size)
 817          break;
 818       if (wv1->words[i1] < wv2->words[i2]) {
 819          i1++;
 820       } else
 821       if (wv1->words[i1] > wv2->words[i2]) {
 822          i2++;
 823       } else {
 824          wv_new->words[k++] = wv1->words[i1];
 825          i1++;
 826          i2++;
 827       }
 828    }
 829    tl_assert(i1 <= wv1->size);
 830    tl_assert(i2 <= wv2->size);
 831    tl_assert(i1 == wv1->size || i2 == wv2->size);
 832
 833    tl_assert(k == sz);
 834
 835    ws_new = add_or_dealloc_WordVec( wsu, wv_new );
 836    if (sz == 0) {
 837       tl_assert(ws_new == wsu->empty);
 838    }
 839
 840    tl_assert(ws_new != (WordSet)(-1));
 841    WCache_UPDATE(wsu->cache_intersect, ws1, ws2, ws_new);
 842
 843    return ws_new;
 844 }
 845
 846 WordSet HG_(minusWS) ( WordSetU* wsu, WordSet ws1, WordSet ws2 )
 847 {
 848    UWord    i1, i2, k, sz;
 849    WordSet  ws_new = (WordSet)(-1); /* bogus */
 850    WordVec* wv_new;
 851    WordVec* wv1;
 852    WordVec* wv2;
 853
 854    wsu->n_minus++;
 855    WCache_LOOKUP_AND_RETURN(WordSet, wsu->cache_minus, ws1, ws2);
 856    wsu->n_minus_uncached++;
 857
 858    wv1 = do_ix2vec( wsu, ws1 );
 859    wv2 = do_ix2vec( wsu, ws2 );
 860    sz = 0;
 861    i1 = i2 = 0;
 862    while (1) {
 863       if (i1 >= wv1->size || i2 >= wv2->size)
 864          break;
 865       if (wv1->words[i1] < wv2->words[i2]) {
 866          sz++;
 867          i1++;
 868       } else
 869       if (wv1->words[i1] > wv2->words[i2]) {
 870          i2++;
 871       } else {
 872          i1++;
 873          i2++;
 874       }
 875    }
 876    tl_assert(i1 <= wv1->size);
 877    tl_assert(i2 <= wv2->size);
 878    tl_assert(i1 == wv1->size || i2 == wv2->size);
 879    if (i2 == wv2->size && i1 < wv1->size) {
 880       sz += (wv1->size - i1);
 881    }
 882
 883    wv_new = new_WV_of_size( wsu, sz );
 884    k = 0;
 885
 886    i1 = i2 = 0;
 887    while (1) {
 888       if (i1 >= wv1->size || i2 >= wv2->size)
 889          break;
 890       if (wv1->words[i1] < wv2->words[i2]) {
 891          wv_new->words[k++] = wv1->words[i1];
 892          i1++;
 893       } else
 894       if (wv1->words[i1] > wv2->words[i2]) {
 895          i2++;
 896       } else {
 897          i1++;
 898          i2++;
 899       }
 900    }
 901    tl_assert(i1 <= wv1->size);
 902    tl_assert(i2 <= wv2->size);
 903    tl_assert(i1 == wv1->size || i2 == wv2->size);
 904    if (i2 == wv2->size && i1 < wv1->size) {
 905       while (i1 < wv1->size)
 906          wv_new->words[k++] = wv1->words[i1++];
 907    }
 908
 909    tl_assert(k == sz);
 910
 911    ws_new = add_or_dealloc_WordVec( wsu, wv_new );
 912    if (sz == 0) {
 913       tl_assert(ws_new == wsu->empty);
 914    }
 915
 916    tl_assert(ws_new != (WordSet)(-1));
 917    WCache_UPDATE(wsu->cache_minus, ws1, ws2, ws_new);
 918
 919    return ws_new;
 920 }
 921
 922 static __attribute__((unused))
 923 void show_WS ( WordSetU* wsu, WordSet ws )
 924 {
 925    UWord i;
 926    WordVec* wv = do_ix2vec( wsu, ws );
 927    VG_(printf)("#%u{", ws);
 928    for (i = 0; i < wv->size; i++) {
 929       VG_(printf)("%lu", wv->words[i]);
 930       if (i < wv->size-1)
 931          VG_(printf)(",");
 932    }
 933    VG_(printf)("}\n");
 934 }
 935
 936 //------------------------------------------------------------------//
 937 //---                        end WordSet                         ---//
 938 //---                       Implementation                       ---//
 939 //------------------------------------------------------------------//
 940
 941 /*--------------------------------------------------------------------*/
 942 /*--- end                                             hg_wordset.c ---*/
 943 /*--------------------------------------------------------------------*/