contrib/libpcap/optimize.c

   1 /*
   2  * Copyright (c) 1988, 1989, 1990, 1991, 1993, 1994, 1995, 1996
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that: (1) source code distributions
   7  * retain the above copyright notice and this paragraph in its entirety, (2)
   8  * distributions including binary code include the above copyright notice and
   9  * this paragraph in its entirety in the documentation or other materials
  10  * provided with the distribution, and (3) all advertising materials mentioning
  11  * features or use of this software display the following acknowledgement:
  12  * ``This product includes software developed by the University of California,
  13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
  14  * the University nor the names of its contributors may be used to endorse
  15  * or promote products derived from this software without specific prior
  16  * written permission.
  17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
  18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
  19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  20  *
  21  *  Optimization module for tcpdump intermediate representation.
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif
  27
  28 #ifdef _WIN32
  29 #include <pcap-stdinc.h>
  30 #else /* _WIN32 */
  31 #if HAVE_INTTYPES_H
  32 #include <inttypes.h>
  33 #elif HAVE_STDINT_H
  34 #include <stdint.h>
  35 #endif
  36 #ifdef HAVE_SYS_BITYPES_H
  37 #include <sys/bitypes.h>
  38 #endif
  39 #include <sys/types.h>
  40 #endif /* _WIN32 */
  41
  42 #include <stdio.h>
  43 #include <stdlib.h>
  44 #include <memory.h>
  45 #include <string.h>
  46
  47 #include <errno.h>
  48
  49 #include "pcap-int.h"
  50
  51 #include "gencode.h"
  52
  53 #ifdef HAVE_OS_PROTO_H
  54 #include "os-proto.h"
  55 #endif
  56
  57 #ifdef BDEBUG
  58 int pcap_optimizer_debug;
  59 #endif
  60
  61 #if defined(MSDOS) && !defined(__DJGPP__)
  62 extern int _w32_ffs (int mask);
  63 #define ffs _w32_ffs
  64 #endif
  65
  66 /*
  67  * So is the check for _MSC_VER done because MinGW has this?
  68  */
  69 #if defined(_WIN32) && defined (_MSC_VER)
  70 /*
  71  * ffs -- vax ffs instruction
  72  *
  73  * XXX - with versions of VS that have it, use _BitScanForward()?
  74  */
  75 static int
  76 ffs(int mask)
  77 {
  78         int bit;
  79
  80         if (mask == 0)
  81                 return(0);
  82         for (bit = 1; !(mask & 1); bit++)
  83                 mask >>= 1;
  84         return(bit);
  85 }
  86 #endif
  87
  88 /*
  89  * Represents a deleted instruction.
  90  */
  91 #define NOP -1
  92
  93 /*
  94  * Register numbers for use-def values.
  95  * 0 through BPF_MEMWORDS-1 represent the corresponding scratch memory
  96  * location.  A_ATOM is the accumulator and X_ATOM is the index
  97  * register.
  98  */
  99 #define A_ATOM BPF_MEMWORDS
 100 #define X_ATOM (BPF_MEMWORDS+1)
 101
 102 /*
 103  * This define is used to represent *both* the accumulator and
 104  * x register in use-def computations.
 105  * Currently, the use-def code assumes only one definition per instruction.
 106  */
 107 #define AX_ATOM N_ATOMS
 108
 109 /*
 110  * These data structures are used in a Cocke and Shwarz style
 111  * value numbering scheme.  Since the flowgraph is acyclic,
 112  * exit values can be propagated from a node's predecessors
 113  * provided it is uniquely defined.
 114  */
 115 struct valnode {
 116         int code;
 117         int v0, v1;
 118         int val;
 119         struct valnode *next;
 120 };
 121
 122 /* Integer constants mapped with the load immediate opcode. */
 123 #define K(i) F(opt_state, BPF_LD|BPF_IMM|BPF_W, i, 0L)
 124
 125 struct vmapinfo {
 126         int is_const;
 127         bpf_int32 const_val;
 128 };
 129
 130 struct _opt_state {
 131         /*
 132          * A flag to indicate that further optimization is needed.
 133          * Iterative passes are continued until a given pass yields no
 134          * branch movement.
 135          */
 136         int done;
 137
 138         int n_blocks;
 139         struct block **blocks;
 140         int n_edges;
 141         struct edge **edges;
 142
 143         /*
 144          * A bit vector set representation of the dominators.
 145          * We round up the set size to the next power of two.
 146          */
 147         int nodewords;
 148         int edgewords;
 149         struct block **levels;
 150         bpf_u_int32 *space;
 151
 152 #define BITS_PER_WORD (8*sizeof(bpf_u_int32))
 153 /*
 154  * True if a is in uset {p}
 155  */
 156 #define SET_MEMBER(p, a) \
 157 ((p)[(unsigned)(a) / BITS_PER_WORD] & (1 << ((unsigned)(a) % BITS_PER_WORD)))
 158
 159 /*
 160  * Add 'a' to uset p.
 161  */
 162 #define SET_INSERT(p, a) \
 163 (p)[(unsigned)(a) / BITS_PER_WORD] |= (1 << ((unsigned)(a) % BITS_PER_WORD))
 164
 165 /*
 166  * Delete 'a' from uset p.
 167  */
 168 #define SET_DELETE(p, a) \
 169 (p)[(unsigned)(a) / BITS_PER_WORD] &= ~(1 << ((unsigned)(a) % BITS_PER_WORD))
 170
 171 /*
 172  * a := a intersect b
 173  */
 174 #define SET_INTERSECT(a, b, n)\
 175 {\
 176         register bpf_u_int32 *_x = a, *_y = b;\
 177         register int _n = n;\
 178         while (--_n >= 0) *_x++ &= *_y++;\
 179 }
 180
 181 /*
 182  * a := a - b
 183  */
 184 #define SET_SUBTRACT(a, b, n)\
 185 {\
 186         register bpf_u_int32 *_x = a, *_y = b;\
 187         register int _n = n;\
 188         while (--_n >= 0) *_x++ &=~ *_y++;\
 189 }
 190
 191 /*
 192  * a := a union b
 193  */
 194 #define SET_UNION(a, b, n)\
 195 {\
 196         register bpf_u_int32 *_x = a, *_y = b;\
 197         register int _n = n;\
 198         while (--_n >= 0) *_x++ |= *_y++;\
 199 }
 200
 201         uset all_dom_sets;
 202         uset all_closure_sets;
 203         uset all_edge_sets;
 204
 205 #define MODULUS 213
 206         struct valnode *hashtbl[MODULUS];
 207         int curval;
 208         int maxval;
 209
 210         struct vmapinfo *vmap;
 211         struct valnode *vnode_base;
 212         struct valnode *next_vnode;
 213 };
 214
 215 typedef struct {
 216         /*
 217          * Some pointers used to convert the basic block form of the code,
 218          * into the array form that BPF requires.  'fstart' will point to
 219          * the malloc'd array while 'ftail' is used during the recursive
 220          * traversal.
 221          */
 222         struct bpf_insn *fstart;
 223         struct bpf_insn *ftail;
 224 } conv_state_t;
 225
 226 static void opt_init(compiler_state_t *, opt_state_t *, struct icode *);
 227 static void opt_cleanup(opt_state_t *);
 228
 229 static void intern_blocks(opt_state_t *, struct icode *);
 230
 231 static void find_inedges(opt_state_t *, struct block *);
 232 #ifdef BDEBUG
 233 static void opt_dump(compiler_state_t *, struct icode *);
 234 #endif
 235
 236 #ifndef MAX
 237 #define MAX(a,b) ((a)>(b)?(a):(b))
 238 #endif
 239
 240 static void
 241 find_levels_r(opt_state_t *opt_state, struct icode *ic, struct block *b)
 242 {
 243         int level;
 244
 245         if (isMarked(ic, b))
 246                 return;
 247
 248         Mark(ic, b);
 249         b->link = 0;
 250
 251         if (JT(b)) {
 252                 find_levels_r(opt_state, ic, JT(b));
 253                 find_levels_r(opt_state, ic, JF(b));
 254                 level = MAX(JT(b)->level, JF(b)->level) + 1;
 255         } else
 256                 level = 0;
 257         b->level = level;
 258         b->link = opt_state->levels[level];
 259         opt_state->levels[level] = b;
 260 }
 261
 262 /*
 263  * Level graph.  The levels go from 0 at the leaves to
 264  * N_LEVELS at the root.  The opt_state->levels[] array points to the
 265  * first node of the level list, whose elements are linked
 266  * with the 'link' field of the struct block.
 267  */
 268 static void
 269 find_levels(opt_state_t *opt_state, struct icode *ic)
 270 {
 271         memset((char *)opt_state->levels, 0, opt_state->n_blocks * sizeof(*opt_state->levels));
 272         unMarkAll(ic);
 273         find_levels_r(opt_state, ic, ic->root);
 274 }
 275
 276 /*
 277  * Find dominator relationships.
 278  * Assumes graph has been leveled.
 279  */
 280 static void
 281 find_dom(opt_state_t *opt_state, struct block *root)
 282 {
 283         int i;
 284         struct block *b;
 285         bpf_u_int32 *x;
 286
 287         /*
 288          * Initialize sets to contain all nodes.
 289          */
 290         x = opt_state->all_dom_sets;
 291         i = opt_state->n_blocks * opt_state->nodewords;
 292         while (--i >= 0)
 293                 *x++ = ~0;
 294         /* Root starts off empty. */
 295         for (i = opt_state->nodewords; --i >= 0;)
 296                 root->dom[i] = 0;
 297
 298         /* root->level is the highest level no found. */
 299         for (i = root->level; i >= 0; --i) {
 300                 for (b = opt_state->levels[i]; b; b = b->link) {
 301                         SET_INSERT(b->dom, b->id);
 302                         if (JT(b) == 0)
 303                                 continue;
 304                         SET_INTERSECT(JT(b)->dom, b->dom, opt_state->nodewords);
 305                         SET_INTERSECT(JF(b)->dom, b->dom, opt_state->nodewords);
 306                 }
 307         }
 308 }
 309
 310 static void
 311 propedom(opt_state_t *opt_state, struct edge *ep)
 312 {
 313         SET_INSERT(ep->edom, ep->id);
 314         if (ep->succ) {
 315                 SET_INTERSECT(ep->succ->et.edom, ep->edom, opt_state->edgewords);
 316                 SET_INTERSECT(ep->succ->ef.edom, ep->edom, opt_state->edgewords);
 317         }
 318 }
 319
 320 /*
 321  * Compute edge dominators.
 322  * Assumes graph has been leveled and predecessors established.
 323  */
 324 static void
 325 find_edom(opt_state_t *opt_state, struct block *root)
 326 {
 327         int i;
 328         uset x;
 329         struct block *b;
 330
 331         x = opt_state->all_edge_sets;
 332         for (i = opt_state->n_edges * opt_state->edgewords; --i >= 0; )
 333                 x[i] = ~0;
 334
 335         /* root->level is the highest level no found. */
 336         memset(root->et.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 337         memset(root->ef.edom, 0, opt_state->edgewords * sizeof(*(uset)0));
 338         for (i = root->level; i >= 0; --i) {
 339                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
 340                         propedom(opt_state, &b->et);
 341                         propedom(opt_state, &b->ef);
 342                 }
 343         }
 344 }
 345
 346 /*
 347  * Find the backwards transitive closure of the flow graph.  These sets
 348  * are backwards in the sense that we find the set of nodes that reach
 349  * a given node, not the set of nodes that can be reached by a node.
 350  *
 351  * Assumes graph has been leveled.
 352  */
 353 static void
 354 find_closure(opt_state_t *opt_state, struct block *root)
 355 {
 356         int i;
 357         struct block *b;
 358
 359         /*
 360          * Initialize sets to contain no nodes.
 361          */
 362         memset((char *)opt_state->all_closure_sets, 0,
 363               opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->all_closure_sets));
 364
 365         /* root->level is the highest level no found. */
 366         for (i = root->level; i >= 0; --i) {
 367                 for (b = opt_state->levels[i]; b; b = b->link) {
 368                         SET_INSERT(b->closure, b->id);
 369                         if (JT(b) == 0)
 370                                 continue;
 371                         SET_UNION(JT(b)->closure, b->closure, opt_state->nodewords);
 372                         SET_UNION(JF(b)->closure, b->closure, opt_state->nodewords);
 373                 }
 374         }
 375 }
 376
 377 /*
 378  * Return the register number that is used by s.  If A and X are both
 379  * used, return AX_ATOM.  If no register is used, return -1.
 380  *
 381  * The implementation should probably change to an array access.
 382  */
 383 static int
 384 atomuse(struct stmt *s)
 385 {
 386         register int c = s->code;
 387
 388         if (c == NOP)
 389                 return -1;
 390
 391         switch (BPF_CLASS(c)) {
 392
 393         case BPF_RET:
 394                 return (BPF_RVAL(c) == BPF_A) ? A_ATOM :
 395                         (BPF_RVAL(c) == BPF_X) ? X_ATOM : -1;
 396
 397         case BPF_LD:
 398         case BPF_LDX:
 399                 return (BPF_MODE(c) == BPF_IND) ? X_ATOM :
 400                         (BPF_MODE(c) == BPF_MEM) ? s->k : -1;
 401
 402         case BPF_ST:
 403                 return A_ATOM;
 404
 405         case BPF_STX:
 406                 return X_ATOM;
 407
 408         case BPF_JMP:
 409         case BPF_ALU:
 410                 if (BPF_SRC(c) == BPF_X)
 411                         return AX_ATOM;
 412                 return A_ATOM;
 413
 414         case BPF_MISC:
 415                 return BPF_MISCOP(c) == BPF_TXA ? X_ATOM : A_ATOM;
 416         }
 417         abort();
 418         /* NOTREACHED */
 419 }
 420
 421 /*
 422  * Return the register number that is defined by 's'.  We assume that
 423  * a single stmt cannot define more than one register.  If no register
 424  * is defined, return -1.
 425  *
 426  * The implementation should probably change to an array access.
 427  */
 428 static int
 429 atomdef(struct stmt *s)
 430 {
 431         if (s->code == NOP)
 432                 return -1;
 433
 434         switch (BPF_CLASS(s->code)) {
 435
 436         case BPF_LD:
 437         case BPF_ALU:
 438                 return A_ATOM;
 439
 440         case BPF_LDX:
 441                 return X_ATOM;
 442
 443         case BPF_ST:
 444         case BPF_STX:
 445                 return s->k;
 446
 447         case BPF_MISC:
 448                 return BPF_MISCOP(s->code) == BPF_TAX ? X_ATOM : A_ATOM;
 449         }
 450         return -1;
 451 }
 452
 453 /*
 454  * Compute the sets of registers used, defined, and killed by 'b'.
 455  *
 456  * "Used" means that a statement in 'b' uses the register before any
 457  * statement in 'b' defines it, i.e. it uses the value left in
 458  * that register by a predecessor block of this block.
 459  * "Defined" means that a statement in 'b' defines it.
 460  * "Killed" means that a statement in 'b' defines it before any
 461  * statement in 'b' uses it, i.e. it kills the value left in that
 462  * register by a predecessor block of this block.
 463  */
 464 static void
 465 compute_local_ud(struct block *b)
 466 {
 467         struct slist *s;
 468         atomset def = 0, use = 0, killed = 0;
 469         int atom;
 470
 471         for (s = b->stmts; s; s = s->next) {
 472                 if (s->s.code == NOP)
 473                         continue;
 474                 atom = atomuse(&s->s);
 475                 if (atom >= 0) {
 476                         if (atom == AX_ATOM) {
 477                                 if (!ATOMELEM(def, X_ATOM))
 478                                         use |= ATOMMASK(X_ATOM);
 479                                 if (!ATOMELEM(def, A_ATOM))
 480                                         use |= ATOMMASK(A_ATOM);
 481                         }
 482                         else if (atom < N_ATOMS) {
 483                                 if (!ATOMELEM(def, atom))
 484                                         use |= ATOMMASK(atom);
 485                         }
 486                         else
 487                                 abort();
 488                 }
 489                 atom = atomdef(&s->s);
 490                 if (atom >= 0) {
 491                         if (!ATOMELEM(use, atom))
 492                                 killed |= ATOMMASK(atom);
 493                         def |= ATOMMASK(atom);
 494                 }
 495         }
 496         if (BPF_CLASS(b->s.code) == BPF_JMP) {
 497                 /*
 498                  * XXX - what about RET?
 499                  */
 500                 atom = atomuse(&b->s);
 501                 if (atom >= 0) {
 502                         if (atom == AX_ATOM) {
 503                                 if (!ATOMELEM(def, X_ATOM))
 504                                         use |= ATOMMASK(X_ATOM);
 505                                 if (!ATOMELEM(def, A_ATOM))
 506                                         use |= ATOMMASK(A_ATOM);
 507                         }
 508                         else if (atom < N_ATOMS) {
 509                                 if (!ATOMELEM(def, atom))
 510                                         use |= ATOMMASK(atom);
 511                         }
 512                         else
 513                                 abort();
 514                 }
 515         }
 516
 517         b->def = def;
 518         b->kill = killed;
 519         b->in_use = use;
 520 }
 521
 522 /*
 523  * Assume graph is already leveled.
 524  */
 525 static void
 526 find_ud(opt_state_t *opt_state, struct block *root)
 527 {
 528         int i, maxlevel;
 529         struct block *p;
 530
 531         /*
 532          * root->level is the highest level no found;
 533          * count down from there.
 534          */
 535         maxlevel = root->level;
 536         for (i = maxlevel; i >= 0; --i)
 537                 for (p = opt_state->levels[i]; p; p = p->link) {
 538                         compute_local_ud(p);
 539                         p->out_use = 0;
 540                 }
 541
 542         for (i = 1; i <= maxlevel; ++i) {
 543                 for (p = opt_state->levels[i]; p; p = p->link) {
 544                         p->out_use |= JT(p)->in_use | JF(p)->in_use;
 545                         p->in_use |= p->out_use &~ p->kill;
 546                 }
 547         }
 548 }
 549 static void
 550 init_val(opt_state_t *opt_state)
 551 {
 552         opt_state->curval = 0;
 553         opt_state->next_vnode = opt_state->vnode_base;
 554         memset((char *)opt_state->vmap, 0, opt_state->maxval * sizeof(*opt_state->vmap));
 555         memset((char *)opt_state->hashtbl, 0, sizeof opt_state->hashtbl);
 556 }
 557
 558 /* Because we really don't have an IR, this stuff is a little messy. */
 559 static int
 560 F(opt_state_t *opt_state, int code, int v0, int v1)
 561 {
 562         u_int hash;
 563         int val;
 564         struct valnode *p;
 565
 566         hash = (u_int)code ^ (v0 << 4) ^ (v1 << 8);
 567         hash %= MODULUS;
 568
 569         for (p = opt_state->hashtbl[hash]; p; p = p->next)
 570                 if (p->code == code && p->v0 == v0 && p->v1 == v1)
 571                         return p->val;
 572
 573         val = ++opt_state->curval;
 574         if (BPF_MODE(code) == BPF_IMM &&
 575             (BPF_CLASS(code) == BPF_LD || BPF_CLASS(code) == BPF_LDX)) {
 576                 opt_state->vmap[val].const_val = v0;
 577                 opt_state->vmap[val].is_const = 1;
 578         }
 579         p = opt_state->next_vnode++;
 580         p->val = val;
 581         p->code = code;
 582         p->v0 = v0;
 583         p->v1 = v1;
 584         p->next = opt_state->hashtbl[hash];
 585         opt_state->hashtbl[hash] = p;
 586
 587         return val;
 588 }
 589
 590 static inline void
 591 vstore(struct stmt *s, int *valp, int newval, int alter)
 592 {
 593         if (alter && *valp == newval)
 594                 s->code = NOP;
 595         else
 596                 *valp = newval;
 597 }
 598
 599 /*
 600  * Do constant-folding on binary operators.
 601  * (Unary operators are handled elsewhere.)
 602  */
 603 static void
 604 fold_op(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
 605     struct stmt *s, int v0, int v1)
 606 {
 607         bpf_u_int32 a, b;
 608
 609         a = opt_state->vmap[v0].const_val;
 610         b = opt_state->vmap[v1].const_val;
 611
 612         switch (BPF_OP(s->code)) {
 613         case BPF_ADD:
 614                 a += b;
 615                 break;
 616
 617         case BPF_SUB:
 618                 a -= b;
 619                 break;
 620
 621         case BPF_MUL:
 622                 a *= b;
 623                 break;
 624
 625         case BPF_DIV:
 626                 if (b == 0)
 627                         bpf_error(cstate, "division by zero");
 628                 a /= b;
 629                 break;
 630
 631         case BPF_MOD:
 632                 if (b == 0)
 633                         bpf_error(cstate, "modulus by zero");
 634                 a %= b;
 635                 break;
 636
 637         case BPF_AND:
 638                 a &= b;
 639                 break;
 640
 641         case BPF_OR:
 642                 a |= b;
 643                 break;
 644
 645         case BPF_XOR:
 646                 a ^= b;
 647                 break;
 648
 649         case BPF_LSH:
 650                 a <<= b;
 651                 break;
 652
 653         case BPF_RSH:
 654                 a >>= b;
 655                 break;
 656
 657         default:
 658                 abort();
 659         }
 660         s->k = a;
 661         s->code = BPF_LD|BPF_IMM;
 662         opt_state->done = 0;
 663 }
 664
 665 static inline struct slist *
 666 this_op(struct slist *s)
 667 {
 668         while (s != 0 && s->s.code == NOP)
 669                 s = s->next;
 670         return s;
 671 }
 672
 673 static void
 674 opt_not(struct block *b)
 675 {
 676         struct block *tmp = JT(b);
 677
 678         JT(b) = JF(b);
 679         JF(b) = tmp;
 680 }
 681
 682 static void
 683 opt_peep(opt_state_t *opt_state, struct block *b)
 684 {
 685         struct slist *s;
 686         struct slist *next, *last;
 687         int val;
 688
 689         s = b->stmts;
 690         if (s == 0)
 691                 return;
 692
 693         last = s;
 694         for (/*empty*/; /*empty*/; s = next) {
 695                 /*
 696                  * Skip over nops.
 697                  */
 698                 s = this_op(s);
 699                 if (s == 0)
 700                         break;  /* nothing left in the block */
 701
 702                 /*
 703                  * Find the next real instruction after that one
 704                  * (skipping nops).
 705                  */
 706                 next = this_op(s->next);
 707                 if (next == 0)
 708                         break;  /* no next instruction */
 709                 last = next;
 710
 711                 /*
 712                  * st  M[k]     -->     st  M[k]
 713                  * ldx M[k]             tax
 714                  */
 715                 if (s->s.code == BPF_ST &&
 716                     next->s.code == (BPF_LDX|BPF_MEM) &&
 717                     s->s.k == next->s.k) {
 718                         opt_state->done = 0;
 719                         next->s.code = BPF_MISC|BPF_TAX;
 720                 }
 721                 /*
 722                  * ld  #k       -->     ldx  #k
 723                  * tax                  txa
 724                  */
 725                 if (s->s.code == (BPF_LD|BPF_IMM) &&
 726                     next->s.code == (BPF_MISC|BPF_TAX)) {
 727                         s->s.code = BPF_LDX|BPF_IMM;
 728                         next->s.code = BPF_MISC|BPF_TXA;
 729                         opt_state->done = 0;
 730                 }
 731                 /*
 732                  * This is an ugly special case, but it happens
 733                  * when you say tcp[k] or udp[k] where k is a constant.
 734                  */
 735                 if (s->s.code == (BPF_LD|BPF_IMM)) {
 736                         struct slist *add, *tax, *ild;
 737
 738                         /*
 739                          * Check that X isn't used on exit from this
 740                          * block (which the optimizer might cause).
 741                          * We know the code generator won't generate
 742                          * any local dependencies.
 743                          */
 744                         if (ATOMELEM(b->out_use, X_ATOM))
 745                                 continue;
 746
 747                         /*
 748                          * Check that the instruction following the ldi
 749                          * is an addx, or it's an ldxms with an addx
 750                          * following it (with 0 or more nops between the
 751                          * ldxms and addx).
 752                          */
 753                         if (next->s.code != (BPF_LDX|BPF_MSH|BPF_B))
 754                                 add = next;
 755                         else
 756                                 add = this_op(next->next);
 757                         if (add == 0 || add->s.code != (BPF_ALU|BPF_ADD|BPF_X))
 758                                 continue;
 759
 760                         /*
 761                          * Check that a tax follows that (with 0 or more
 762                          * nops between them).
 763                          */
 764                         tax = this_op(add->next);
 765                         if (tax == 0 || tax->s.code != (BPF_MISC|BPF_TAX))
 766                                 continue;
 767
 768                         /*
 769                          * Check that an ild follows that (with 0 or more
 770                          * nops between them).
 771                          */
 772                         ild = this_op(tax->next);
 773                         if (ild == 0 || BPF_CLASS(ild->s.code) != BPF_LD ||
 774                             BPF_MODE(ild->s.code) != BPF_IND)
 775                                 continue;
 776                         /*
 777                          * We want to turn this sequence:
 778                          *
 779                          * (004) ldi     #0x2           {s}
 780                          * (005) ldxms   [14]           {next}  -- optional
 781                          * (006) addx                   {add}
 782                          * (007) tax                    {tax}
 783                          * (008) ild     [x+0]          {ild}
 784                          *
 785                          * into this sequence:
 786                          *
 787                          * (004) nop
 788                          * (005) ldxms   [14]
 789                          * (006) nop
 790                          * (007) nop
 791                          * (008) ild     [x+2]
 792                          *
 793                          * XXX We need to check that X is not
 794                          * subsequently used, because we want to change
 795                          * what'll be in it after this sequence.
 796                          *
 797                          * We know we can eliminate the accumulator
 798                          * modifications earlier in the sequence since
 799                          * it is defined by the last stmt of this sequence
 800                          * (i.e., the last statement of the sequence loads
 801                          * a value into the accumulator, so we can eliminate
 802                          * earlier operations on the accumulator).
 803                          */
 804                         ild->s.k += s->s.k;
 805                         s->s.code = NOP;
 806                         add->s.code = NOP;
 807                         tax->s.code = NOP;
 808                         opt_state->done = 0;
 809                 }
 810         }
 811         /*
 812          * If the comparison at the end of a block is an equality
 813          * comparison against a constant, and nobody uses the value
 814          * we leave in the A register at the end of a block, and
 815          * the operation preceding the comparison is an arithmetic
 816          * operation, we can sometime optimize it away.
 817          */
 818         if (b->s.code == (BPF_JMP|BPF_JEQ|BPF_K) &&
 819             !ATOMELEM(b->out_use, A_ATOM)) {
 820                 /*
 821                  * We can optimize away certain subtractions of the
 822                  * X register.
 823                  */
 824                 if (last->s.code == (BPF_ALU|BPF_SUB|BPF_X)) {
 825                         val = b->val[X_ATOM];
 826                         if (opt_state->vmap[val].is_const) {
 827                                 /*
 828                                  * If we have a subtract to do a comparison,
 829                                  * and the X register is a known constant,
 830                                  * we can merge this value into the
 831                                  * comparison:
 832                                  *
 833                                  * sub x  ->    nop
 834                                  * jeq #y       jeq #(x+y)
 835                                  */
 836                                 b->s.k += opt_state->vmap[val].const_val;
 837                                 last->s.code = NOP;
 838                                 opt_state->done = 0;
 839                         } else if (b->s.k == 0) {
 840                                 /*
 841                                  * If the X register isn't a constant,
 842                                  * and the comparison in the test is
 843                                  * against 0, we can compare with the
 844                                  * X register, instead:
 845                                  *
 846                                  * sub x  ->    nop
 847                                  * jeq #0       jeq x
 848                                  */
 849                                 last->s.code = NOP;
 850                                 b->s.code = BPF_JMP|BPF_JEQ|BPF_X;
 851                                 opt_state->done = 0;
 852                         }
 853                 }
 854                 /*
 855                  * Likewise, a constant subtract can be simplified:
 856                  *
 857                  * sub #x ->    nop
 858                  * jeq #y ->    jeq #(x+y)
 859                  */
 860                 else if (last->s.code == (BPF_ALU|BPF_SUB|BPF_K)) {
 861                         last->s.code = NOP;
 862                         b->s.k += last->s.k;
 863                         opt_state->done = 0;
 864                 }
 865                 /*
 866                  * And, similarly, a constant AND can be simplified
 867                  * if we're testing against 0, i.e.:
 868                  *
 869                  * and #k       nop
 870                  * jeq #0  ->   jset #k
 871                  */
 872                 else if (last->s.code == (BPF_ALU|BPF_AND|BPF_K) &&
 873                     b->s.k == 0) {
 874                         b->s.k = last->s.k;
 875                         b->s.code = BPF_JMP|BPF_K|BPF_JSET;
 876                         last->s.code = NOP;
 877                         opt_state->done = 0;
 878                         opt_not(b);
 879                 }
 880         }
 881         /*
 882          * jset #0        ->   never
 883          * jset #ffffffff ->   always
 884          */
 885         if (b->s.code == (BPF_JMP|BPF_K|BPF_JSET)) {
 886                 if (b->s.k == 0)
 887                         JT(b) = JF(b);
 888                 if ((u_int)b->s.k == 0xffffffffU)
 889                         JF(b) = JT(b);
 890         }
 891         /*
 892          * If we're comparing against the index register, and the index
 893          * register is a known constant, we can just compare against that
 894          * constant.
 895          */
 896         val = b->val[X_ATOM];
 897         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_X) {
 898                 bpf_int32 v = opt_state->vmap[val].const_val;
 899                 b->s.code &= ~BPF_X;
 900                 b->s.k = v;
 901         }
 902         /*
 903          * If the accumulator is a known constant, we can compute the
 904          * comparison result.
 905          */
 906         val = b->val[A_ATOM];
 907         if (opt_state->vmap[val].is_const && BPF_SRC(b->s.code) == BPF_K) {
 908                 bpf_int32 v = opt_state->vmap[val].const_val;
 909                 switch (BPF_OP(b->s.code)) {
 910
 911                 case BPF_JEQ:
 912                         v = v == b->s.k;
 913                         break;
 914
 915                 case BPF_JGT:
 916                         v = (unsigned)v > (unsigned)b->s.k;
 917                         break;
 918
 919                 case BPF_JGE:
 920                         v = (unsigned)v >= (unsigned)b->s.k;
 921                         break;
 922
 923                 case BPF_JSET:
 924                         v &= b->s.k;
 925                         break;
 926
 927                 default:
 928                         abort();
 929                 }
 930                 if (JF(b) != JT(b))
 931                         opt_state->done = 0;
 932                 if (v)
 933                         JF(b) = JT(b);
 934                 else
 935                         JT(b) = JF(b);
 936         }
 937 }
 938
 939 /*
 940  * Compute the symbolic value of expression of 's', and update
 941  * anything it defines in the value table 'val'.  If 'alter' is true,
 942  * do various optimizations.  This code would be cleaner if symbolic
 943  * evaluation and code transformations weren't folded together.
 944  */
 945 static void
 946 opt_stmt(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
 947     struct stmt *s, int val[], int alter)
 948 {
 949         int op;
 950         int v;
 951
 952         switch (s->code) {
 953
 954         case BPF_LD|BPF_ABS|BPF_W:
 955         case BPF_LD|BPF_ABS|BPF_H:
 956         case BPF_LD|BPF_ABS|BPF_B:
 957                 v = F(opt_state, s->code, s->k, 0L);
 958                 vstore(s, &val[A_ATOM], v, alter);
 959                 break;
 960
 961         case BPF_LD|BPF_IND|BPF_W:
 962         case BPF_LD|BPF_IND|BPF_H:
 963         case BPF_LD|BPF_IND|BPF_B:
 964                 v = val[X_ATOM];
 965                 if (alter && opt_state->vmap[v].is_const) {
 966                         s->code = BPF_LD|BPF_ABS|BPF_SIZE(s->code);
 967                         s->k += opt_state->vmap[v].const_val;
 968                         v = F(opt_state, s->code, s->k, 0L);
 969                         opt_state->done = 0;
 970                 }
 971                 else
 972                         v = F(opt_state, s->code, s->k, v);
 973                 vstore(s, &val[A_ATOM], v, alter);
 974                 break;
 975
 976         case BPF_LD|BPF_LEN:
 977                 v = F(opt_state, s->code, 0L, 0L);
 978                 vstore(s, &val[A_ATOM], v, alter);
 979                 break;
 980
 981         case BPF_LD|BPF_IMM:
 982                 v = K(s->k);
 983                 vstore(s, &val[A_ATOM], v, alter);
 984                 break;
 985
 986         case BPF_LDX|BPF_IMM:
 987                 v = K(s->k);
 988                 vstore(s, &val[X_ATOM], v, alter);
 989                 break;
 990
 991         case BPF_LDX|BPF_MSH|BPF_B:
 992                 v = F(opt_state, s->code, s->k, 0L);
 993                 vstore(s, &val[X_ATOM], v, alter);
 994                 break;
 995
 996         case BPF_ALU|BPF_NEG:
 997                 if (alter && opt_state->vmap[val[A_ATOM]].is_const) {
 998                         s->code = BPF_LD|BPF_IMM;
 999                         s->k = -opt_state->vmap[val[A_ATOM]].const_val;
1000                         val[A_ATOM] = K(s->k);
1001                 }
1002                 else
1003                         val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], 0L);
1004                 break;
1005
1006         case BPF_ALU|BPF_ADD|BPF_K:
1007         case BPF_ALU|BPF_SUB|BPF_K:
1008         case BPF_ALU|BPF_MUL|BPF_K:
1009         case BPF_ALU|BPF_DIV|BPF_K:
1010         case BPF_ALU|BPF_MOD|BPF_K:
1011         case BPF_ALU|BPF_AND|BPF_K:
1012         case BPF_ALU|BPF_OR|BPF_K:
1013         case BPF_ALU|BPF_XOR|BPF_K:
1014         case BPF_ALU|BPF_LSH|BPF_K:
1015         case BPF_ALU|BPF_RSH|BPF_K:
1016                 op = BPF_OP(s->code);
1017                 if (alter) {
1018                         if (s->k == 0) {
1019                                 /* don't optimize away "sub #0"
1020                                  * as it may be needed later to
1021                                  * fixup the generated math code */
1022                                 if (op == BPF_ADD ||
1023                                     op == BPF_LSH || op == BPF_RSH ||
1024                                     op == BPF_OR || op == BPF_XOR) {
1025                                         s->code = NOP;
1026                                         break;
1027                                 }
1028                                 if (op == BPF_MUL || op == BPF_AND) {
1029                                         s->code = BPF_LD|BPF_IMM;
1030                                         val[A_ATOM] = K(s->k);
1031                                         break;
1032                                 }
1033                         }
1034                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1035                                 fold_op(cstate, ic, opt_state, s, val[A_ATOM], K(s->k));
1036                                 val[A_ATOM] = K(s->k);
1037                                 break;
1038                         }
1039                 }
1040                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], K(s->k));
1041                 break;
1042
1043         case BPF_ALU|BPF_ADD|BPF_X:
1044         case BPF_ALU|BPF_SUB|BPF_X:
1045         case BPF_ALU|BPF_MUL|BPF_X:
1046         case BPF_ALU|BPF_DIV|BPF_X:
1047         case BPF_ALU|BPF_MOD|BPF_X:
1048         case BPF_ALU|BPF_AND|BPF_X:
1049         case BPF_ALU|BPF_OR|BPF_X:
1050         case BPF_ALU|BPF_XOR|BPF_X:
1051         case BPF_ALU|BPF_LSH|BPF_X:
1052         case BPF_ALU|BPF_RSH|BPF_X:
1053                 op = BPF_OP(s->code);
1054                 if (alter && opt_state->vmap[val[X_ATOM]].is_const) {
1055                         if (opt_state->vmap[val[A_ATOM]].is_const) {
1056                                 fold_op(cstate, ic, opt_state, s, val[A_ATOM], val[X_ATOM]);
1057                                 val[A_ATOM] = K(s->k);
1058                         }
1059                         else {
1060                                 s->code = BPF_ALU|BPF_K|op;
1061                                 s->k = opt_state->vmap[val[X_ATOM]].const_val;
1062                                 opt_state->done = 0;
1063                                 val[A_ATOM] =
1064                                         F(opt_state, s->code, val[A_ATOM], K(s->k));
1065                         }
1066                         break;
1067                 }
1068                 /*
1069                  * Check if we're doing something to an accumulator
1070                  * that is 0, and simplify.  This may not seem like
1071                  * much of a simplification but it could open up further
1072                  * optimizations.
1073                  * XXX We could also check for mul by 1, etc.
1074                  */
1075                 if (alter && opt_state->vmap[val[A_ATOM]].is_const
1076                     && opt_state->vmap[val[A_ATOM]].const_val == 0) {
1077                         if (op == BPF_ADD || op == BPF_OR || op == BPF_XOR) {
1078                                 s->code = BPF_MISC|BPF_TXA;
1079                                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1080                                 break;
1081                         }
1082                         else if (op == BPF_MUL || op == BPF_DIV || op == BPF_MOD ||
1083                                  op == BPF_AND || op == BPF_LSH || op == BPF_RSH) {
1084                                 s->code = BPF_LD|BPF_IMM;
1085                                 s->k = 0;
1086                                 vstore(s, &val[A_ATOM], K(s->k), alter);
1087                                 break;
1088                         }
1089                         else if (op == BPF_NEG) {
1090                                 s->code = NOP;
1091                                 break;
1092                         }
1093                 }
1094                 val[A_ATOM] = F(opt_state, s->code, val[A_ATOM], val[X_ATOM]);
1095                 break;
1096
1097         case BPF_MISC|BPF_TXA:
1098                 vstore(s, &val[A_ATOM], val[X_ATOM], alter);
1099                 break;
1100
1101         case BPF_LD|BPF_MEM:
1102                 v = val[s->k];
1103                 if (alter && opt_state->vmap[v].is_const) {
1104                         s->code = BPF_LD|BPF_IMM;
1105                         s->k = opt_state->vmap[v].const_val;
1106                         opt_state->done = 0;
1107                 }
1108                 vstore(s, &val[A_ATOM], v, alter);
1109                 break;
1110
1111         case BPF_MISC|BPF_TAX:
1112                 vstore(s, &val[X_ATOM], val[A_ATOM], alter);
1113                 break;
1114
1115         case BPF_LDX|BPF_MEM:
1116                 v = val[s->k];
1117                 if (alter && opt_state->vmap[v].is_const) {
1118                         s->code = BPF_LDX|BPF_IMM;
1119                         s->k = opt_state->vmap[v].const_val;
1120                         opt_state->done = 0;
1121                 }
1122                 vstore(s, &val[X_ATOM], v, alter);
1123                 break;
1124
1125         case BPF_ST:
1126                 vstore(s, &val[s->k], val[A_ATOM], alter);
1127                 break;
1128
1129         case BPF_STX:
1130                 vstore(s, &val[s->k], val[X_ATOM], alter);
1131                 break;
1132         }
1133 }
1134
1135 static void
1136 deadstmt(opt_state_t *opt_state, register struct stmt *s, register struct stmt *last[])
1137 {
1138         register int atom;
1139
1140         atom = atomuse(s);
1141         if (atom >= 0) {
1142                 if (atom == AX_ATOM) {
1143                         last[X_ATOM] = 0;
1144                         last[A_ATOM] = 0;
1145                 }
1146                 else
1147                         last[atom] = 0;
1148         }
1149         atom = atomdef(s);
1150         if (atom >= 0) {
1151                 if (last[atom]) {
1152                         opt_state->done = 0;
1153                         last[atom]->code = NOP;
1154                 }
1155                 last[atom] = s;
1156         }
1157 }
1158
1159 static void
1160 opt_deadstores(opt_state_t *opt_state, register struct block *b)
1161 {
1162         register struct slist *s;
1163         register int atom;
1164         struct stmt *last[N_ATOMS];
1165
1166         memset((char *)last, 0, sizeof last);
1167
1168         for (s = b->stmts; s != 0; s = s->next)
1169                 deadstmt(opt_state, &s->s, last);
1170         deadstmt(opt_state, &b->s, last);
1171
1172         for (atom = 0; atom < N_ATOMS; ++atom)
1173                 if (last[atom] && !ATOMELEM(b->out_use, atom)) {
1174                         last[atom]->code = NOP;
1175                         opt_state->done = 0;
1176                 }
1177 }
1178
1179 static void
1180 opt_blk(compiler_state_t *cstate, struct icode *ic, opt_state_t *opt_state,
1181     struct block *b, int do_stmts)
1182 {
1183         struct slist *s;
1184         struct edge *p;
1185         int i;
1186         bpf_int32 aval, xval;
1187
1188 #if 0
1189         for (s = b->stmts; s && s->next; s = s->next)
1190                 if (BPF_CLASS(s->s.code) == BPF_JMP) {
1191                         do_stmts = 0;
1192                         break;
1193                 }
1194 #endif
1195
1196         /*
1197          * Initialize the atom values.
1198          */
1199         p = b->in_edges;
1200         if (p == 0) {
1201                 /*
1202                  * We have no predecessors, so everything is undefined
1203                  * upon entry to this block.
1204                  */
1205                 memset((char *)b->val, 0, sizeof(b->val));
1206         } else {
1207                 /*
1208                  * Inherit values from our predecessors.
1209                  *
1210                  * First, get the values from the predecessor along the
1211                  * first edge leading to this node.
1212                  */
1213                 memcpy((char *)b->val, (char *)p->pred->val, sizeof(b->val));
1214                 /*
1215                  * Now look at all the other nodes leading to this node.
1216                  * If, for the predecessor along that edge, a register
1217                  * has a different value from the one we have (i.e.,
1218                  * control paths are merging, and the merging paths
1219                  * assign different values to that register), give the
1220                  * register the undefined value of 0.
1221                  */
1222                 while ((p = p->next) != NULL) {
1223                         for (i = 0; i < N_ATOMS; ++i)
1224                                 if (b->val[i] != p->pred->val[i])
1225                                         b->val[i] = 0;
1226                 }
1227         }
1228         aval = b->val[A_ATOM];
1229         xval = b->val[X_ATOM];
1230         for (s = b->stmts; s; s = s->next)
1231                 opt_stmt(cstate, ic, opt_state, &s->s, b->val, do_stmts);
1232
1233         /*
1234          * This is a special case: if we don't use anything from this
1235          * block, and we load the accumulator or index register with a
1236          * value that is already there, or if this block is a return,
1237          * eliminate all the statements.
1238          *
1239          * XXX - what if it does a store?
1240          *
1241          * XXX - why does it matter whether we use anything from this
1242          * block?  If the accumulator or index register doesn't change
1243          * its value, isn't that OK even if we use that value?
1244          *
1245          * XXX - if we load the accumulator with a different value,
1246          * and the block ends with a conditional branch, we obviously
1247          * can't eliminate it, as the branch depends on that value.
1248          * For the index register, the conditional branch only depends
1249          * on the index register value if the test is against the index
1250          * register value rather than a constant; if nothing uses the
1251          * value we put into the index register, and we're not testing
1252          * against the index register's value, and there aren't any
1253          * other problems that would keep us from eliminating this
1254          * block, can we eliminate it?
1255          */
1256         if (do_stmts &&
1257             ((b->out_use == 0 && aval != 0 && b->val[A_ATOM] == aval &&
1258               xval != 0 && b->val[X_ATOM] == xval) ||
1259              BPF_CLASS(b->s.code) == BPF_RET)) {
1260                 if (b->stmts != 0) {
1261                         b->stmts = 0;
1262                         opt_state->done = 0;
1263                 }
1264         } else {
1265                 opt_peep(opt_state, b);
1266                 opt_deadstores(opt_state, b);
1267         }
1268         /*
1269          * Set up values for branch optimizer.
1270          */
1271         if (BPF_SRC(b->s.code) == BPF_K)
1272                 b->oval = K(b->s.k);
1273         else
1274                 b->oval = b->val[X_ATOM];
1275         b->et.code = b->s.code;
1276         b->ef.code = -b->s.code;
1277 }
1278
1279 /*
1280  * Return true if any register that is used on exit from 'succ', has
1281  * an exit value that is different from the corresponding exit value
1282  * from 'b'.
1283  */
1284 static int
1285 use_conflict(struct block *b, struct block *succ)
1286 {
1287         int atom;
1288         atomset use = succ->out_use;
1289
1290         if (use == 0)
1291                 return 0;
1292
1293         for (atom = 0; atom < N_ATOMS; ++atom)
1294                 if (ATOMELEM(use, atom))
1295                         if (b->val[atom] != succ->val[atom])
1296                                 return 1;
1297         return 0;
1298 }
1299
1300 static struct block *
1301 fold_edge(struct block *child, struct edge *ep)
1302 {
1303         int sense;
1304         int aval0, aval1, oval0, oval1;
1305         int code = ep->code;
1306
1307         if (code < 0) {
1308                 code = -code;
1309                 sense = 0;
1310         } else
1311                 sense = 1;
1312
1313         if (child->s.code != code)
1314                 return 0;
1315
1316         aval0 = child->val[A_ATOM];
1317         oval0 = child->oval;
1318         aval1 = ep->pred->val[A_ATOM];
1319         oval1 = ep->pred->oval;
1320
1321         if (aval0 != aval1)
1322                 return 0;
1323
1324         if (oval0 == oval1)
1325                 /*
1326                  * The operands of the branch instructions are
1327                  * identical, so the result is true if a true
1328                  * branch was taken to get here, otherwise false.
1329                  */
1330                 return sense ? JT(child) : JF(child);
1331
1332         if (sense && code == (BPF_JMP|BPF_JEQ|BPF_K))
1333                 /*
1334                  * At this point, we only know the comparison if we
1335                  * came down the true branch, and it was an equality
1336                  * comparison with a constant.
1337                  *
1338                  * I.e., if we came down the true branch, and the branch
1339                  * was an equality comparison with a constant, we know the
1340                  * accumulator contains that constant.  If we came down
1341                  * the false branch, or the comparison wasn't with a
1342                  * constant, we don't know what was in the accumulator.
1343                  *
1344                  * We rely on the fact that distinct constants have distinct
1345                  * value numbers.
1346                  */
1347                 return JF(child);
1348
1349         return 0;
1350 }
1351
1352 static void
1353 opt_j(opt_state_t *opt_state, struct edge *ep)
1354 {
1355         register int i, k;
1356         register struct block *target;
1357
1358         if (JT(ep->succ) == 0)
1359                 return;
1360
1361         if (JT(ep->succ) == JF(ep->succ)) {
1362                 /*
1363                  * Common branch targets can be eliminated, provided
1364                  * there is no data dependency.
1365                  */
1366                 if (!use_conflict(ep->pred, ep->succ->et.succ)) {
1367                         opt_state->done = 0;
1368                         ep->succ = JT(ep->succ);
1369                 }
1370         }
1371         /*
1372          * For each edge dominator that matches the successor of this
1373          * edge, promote the edge successor to the its grandchild.
1374          *
1375          * XXX We violate the set abstraction here in favor a reasonably
1376          * efficient loop.
1377          */
1378  top:
1379         for (i = 0; i < opt_state->edgewords; ++i) {
1380                 register bpf_u_int32 x = ep->edom[i];
1381
1382                 while (x != 0) {
1383                         k = ffs(x) - 1;
1384                         x &=~ (1 << k);
1385                         k += i * BITS_PER_WORD;
1386
1387                         target = fold_edge(ep->succ, opt_state->edges[k]);
1388                         /*
1389                          * Check that there is no data dependency between
1390                          * nodes that will be violated if we move the edge.
1391                          */
1392                         if (target != 0 && !use_conflict(ep->pred, target)) {
1393                                 opt_state->done = 0;
1394                                 ep->succ = target;
1395                                 if (JT(target) != 0)
1396                                         /*
1397                                          * Start over unless we hit a leaf.
1398                                          */
1399                                         goto top;
1400                                 return;
1401                         }
1402                 }
1403         }
1404 }
1405
1406
1407 static void
1408 or_pullup(opt_state_t *opt_state, struct block *b)
1409 {
1410         int val, at_top;
1411         struct block *pull;
1412         struct block **diffp, **samep;
1413         struct edge *ep;
1414
1415         ep = b->in_edges;
1416         if (ep == 0)
1417                 return;
1418
1419         /*
1420          * Make sure each predecessor loads the same value.
1421          * XXX why?
1422          */
1423         val = ep->pred->val[A_ATOM];
1424         for (ep = ep->next; ep != 0; ep = ep->next)
1425                 if (val != ep->pred->val[A_ATOM])
1426                         return;
1427
1428         if (JT(b->in_edges->pred) == b)
1429                 diffp = &JT(b->in_edges->pred);
1430         else
1431                 diffp = &JF(b->in_edges->pred);
1432
1433         at_top = 1;
1434         while (1) {
1435                 if (*diffp == 0)
1436                         return;
1437
1438                 if (JT(*diffp) != JT(b))
1439                         return;
1440
1441                 if (!SET_MEMBER((*diffp)->dom, b->id))
1442                         return;
1443
1444                 if ((*diffp)->val[A_ATOM] != val)
1445                         break;
1446
1447                 diffp = &JF(*diffp);
1448                 at_top = 0;
1449         }
1450         samep = &JF(*diffp);
1451         while (1) {
1452                 if (*samep == 0)
1453                         return;
1454
1455                 if (JT(*samep) != JT(b))
1456                         return;
1457
1458                 if (!SET_MEMBER((*samep)->dom, b->id))
1459                         return;
1460
1461                 if ((*samep)->val[A_ATOM] == val)
1462                         break;
1463
1464                 /* XXX Need to check that there are no data dependencies
1465                    between dp0 and dp1.  Currently, the code generator
1466                    will not produce such dependencies. */
1467                 samep = &JF(*samep);
1468         }
1469 #ifdef notdef
1470         /* XXX This doesn't cover everything. */
1471         for (i = 0; i < N_ATOMS; ++i)
1472                 if ((*samep)->val[i] != pred->val[i])
1473                         return;
1474 #endif
1475         /* Pull up the node. */
1476         pull = *samep;
1477         *samep = JF(pull);
1478         JF(pull) = *diffp;
1479
1480         /*
1481          * At the top of the chain, each predecessor needs to point at the
1482          * pulled up node.  Inside the chain, there is only one predecessor
1483          * to worry about.
1484          */
1485         if (at_top) {
1486                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1487                         if (JT(ep->pred) == b)
1488                                 JT(ep->pred) = pull;
1489                         else
1490                                 JF(ep->pred) = pull;
1491                 }
1492         }
1493         else
1494                 *diffp = pull;
1495
1496         opt_state->done = 0;
1497 }
1498
1499 static void
1500 and_pullup(opt_state_t *opt_state, struct block *b)
1501 {
1502         int val, at_top;
1503         struct block *pull;
1504         struct block **diffp, **samep;
1505         struct edge *ep;
1506
1507         ep = b->in_edges;
1508         if (ep == 0)
1509                 return;
1510
1511         /*
1512          * Make sure each predecessor loads the same value.
1513          */
1514         val = ep->pred->val[A_ATOM];
1515         for (ep = ep->next; ep != 0; ep = ep->next)
1516                 if (val != ep->pred->val[A_ATOM])
1517                         return;
1518
1519         if (JT(b->in_edges->pred) == b)
1520                 diffp = &JT(b->in_edges->pred);
1521         else
1522                 diffp = &JF(b->in_edges->pred);
1523
1524         at_top = 1;
1525         while (1) {
1526                 if (*diffp == 0)
1527                         return;
1528
1529                 if (JF(*diffp) != JF(b))
1530                         return;
1531
1532                 if (!SET_MEMBER((*diffp)->dom, b->id))
1533                         return;
1534
1535                 if ((*diffp)->val[A_ATOM] != val)
1536                         break;
1537
1538                 diffp = &JT(*diffp);
1539                 at_top = 0;
1540         }
1541         samep = &JT(*diffp);
1542         while (1) {
1543                 if (*samep == 0)
1544                         return;
1545
1546                 if (JF(*samep) != JF(b))
1547                         return;
1548
1549                 if (!SET_MEMBER((*samep)->dom, b->id))
1550                         return;
1551
1552                 if ((*samep)->val[A_ATOM] == val)
1553                         break;
1554
1555                 /* XXX Need to check that there are no data dependencies
1556                    between diffp and samep.  Currently, the code generator
1557                    will not produce such dependencies. */
1558                 samep = &JT(*samep);
1559         }
1560 #ifdef notdef
1561         /* XXX This doesn't cover everything. */
1562         for (i = 0; i < N_ATOMS; ++i)
1563                 if ((*samep)->val[i] != pred->val[i])
1564                         return;
1565 #endif
1566         /* Pull up the node. */
1567         pull = *samep;
1568         *samep = JT(pull);
1569         JT(pull) = *diffp;
1570
1571         /*
1572          * At the top of the chain, each predecessor needs to point at the
1573          * pulled up node.  Inside the chain, there is only one predecessor
1574          * to worry about.
1575          */
1576         if (at_top) {
1577                 for (ep = b->in_edges; ep != 0; ep = ep->next) {
1578                         if (JT(ep->pred) == b)
1579                                 JT(ep->pred) = pull;
1580                         else
1581                                 JF(ep->pred) = pull;
1582                 }
1583         }
1584         else
1585                 *diffp = pull;
1586
1587         opt_state->done = 0;
1588 }
1589
1590 static void
1591 opt_blks(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1592     int do_stmts)
1593 {
1594         int i, maxlevel;
1595         struct block *p;
1596
1597         init_val(opt_state);
1598         maxlevel = ic->root->level;
1599
1600         find_inedges(opt_state, ic->root);
1601         for (i = maxlevel; i >= 0; --i)
1602                 for (p = opt_state->levels[i]; p; p = p->link)
1603                         opt_blk(cstate, ic, opt_state, p, do_stmts);
1604
1605         if (do_stmts)
1606                 /*
1607                  * No point trying to move branches; it can't possibly
1608                  * make a difference at this point.
1609                  */
1610                 return;
1611
1612         for (i = 1; i <= maxlevel; ++i) {
1613                 for (p = opt_state->levels[i]; p; p = p->link) {
1614                         opt_j(opt_state, &p->et);
1615                         opt_j(opt_state, &p->ef);
1616                 }
1617         }
1618
1619         find_inedges(opt_state, ic->root);
1620         for (i = 1; i <= maxlevel; ++i) {
1621                 for (p = opt_state->levels[i]; p; p = p->link) {
1622                         or_pullup(opt_state, p);
1623                         and_pullup(opt_state, p);
1624                 }
1625         }
1626 }
1627
1628 static inline void
1629 link_inedge(struct edge *parent, struct block *child)
1630 {
1631         parent->next = child->in_edges;
1632         child->in_edges = parent;
1633 }
1634
1635 static void
1636 find_inedges(opt_state_t *opt_state, struct block *root)
1637 {
1638         int i;
1639         struct block *b;
1640
1641         for (i = 0; i < opt_state->n_blocks; ++i)
1642                 opt_state->blocks[i]->in_edges = 0;
1643
1644         /*
1645          * Traverse the graph, adding each edge to the predecessor
1646          * list of its successors.  Skip the leaves (i.e. level 0).
1647          */
1648         for (i = root->level; i > 0; --i) {
1649                 for (b = opt_state->levels[i]; b != 0; b = b->link) {
1650                         link_inedge(&b->et, JT(b));
1651                         link_inedge(&b->ef, JF(b));
1652                 }
1653         }
1654 }
1655
1656 static void
1657 opt_root(struct block **b)
1658 {
1659         struct slist *tmp, *s;
1660
1661         s = (*b)->stmts;
1662         (*b)->stmts = 0;
1663         while (BPF_CLASS((*b)->s.code) == BPF_JMP && JT(*b) == JF(*b))
1664                 *b = JT(*b);
1665
1666         tmp = (*b)->stmts;
1667         if (tmp != 0)
1668                 sappend(s, tmp);
1669         (*b)->stmts = s;
1670
1671         /*
1672          * If the root node is a return, then there is no
1673          * point executing any statements (since the bpf machine
1674          * has no side effects).
1675          */
1676         if (BPF_CLASS((*b)->s.code) == BPF_RET)
1677                 (*b)->stmts = 0;
1678 }
1679
1680 static void
1681 opt_loop(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic,
1682     int do_stmts)
1683 {
1684
1685 #ifdef BDEBUG
1686         if (pcap_optimizer_debug > 1) {
1687                 printf("opt_loop(root, %d) begin\n", do_stmts);
1688                 opt_dump(cstate, ic);
1689         }
1690 #endif
1691         do {
1692                 opt_state->done = 1;
1693                 find_levels(opt_state, ic);
1694                 find_dom(opt_state, ic->root);
1695                 find_closure(opt_state, ic->root);
1696                 find_ud(opt_state, ic->root);
1697                 find_edom(opt_state, ic->root);
1698                 opt_blks(cstate, opt_state, ic, do_stmts);
1699 #ifdef BDEBUG
1700                 if (pcap_optimizer_debug > 1) {
1701                         printf("opt_loop(root, %d) bottom, done=%d\n", do_stmts, opt_state->done);
1702                         opt_dump(cstate, ic);
1703                 }
1704 #endif
1705         } while (!opt_state->done);
1706 }
1707
1708 /*
1709  * Optimize the filter code in its dag representation.
1710  */
1711 void
1712 bpf_optimize(compiler_state_t *cstate, struct icode *ic)
1713 {
1714         opt_state_t opt_state;
1715
1716         opt_init(cstate, &opt_state, ic);
1717         opt_loop(cstate, &opt_state, ic, 0);
1718         opt_loop(cstate, &opt_state, ic, 1);
1719         intern_blocks(&opt_state, ic);
1720 #ifdef BDEBUG
1721         if (pcap_optimizer_debug > 1) {
1722                 printf("after intern_blocks()\n");
1723                 opt_dump(cstate, ic);
1724         }
1725 #endif
1726         opt_root(&ic->root);
1727 #ifdef BDEBUG
1728         if (pcap_optimizer_debug > 1) {
1729                 printf("after opt_root()\n");
1730                 opt_dump(cstate, ic);
1731         }
1732 #endif
1733         opt_cleanup(&opt_state);
1734 }
1735
1736 static void
1737 make_marks(struct icode *ic, struct block *p)
1738 {
1739         if (!isMarked(ic, p)) {
1740                 Mark(ic, p);
1741                 if (BPF_CLASS(p->s.code) != BPF_RET) {
1742                         make_marks(ic, JT(p));
1743                         make_marks(ic, JF(p));
1744                 }
1745         }
1746 }
1747
1748 /*
1749  * Mark code array such that isMarked(ic->cur_mark, i) is true
1750  * only for nodes that are alive.
1751  */
1752 static void
1753 mark_code(struct icode *ic)
1754 {
1755         ic->cur_mark += 1;
1756         make_marks(ic, ic->root);
1757 }
1758
1759 /*
1760  * True iff the two stmt lists load the same value from the packet into
1761  * the accumulator.
1762  */
1763 static int
1764 eq_slist(struct slist *x, struct slist *y)
1765 {
1766         while (1) {
1767                 while (x && x->s.code == NOP)
1768                         x = x->next;
1769                 while (y && y->s.code == NOP)
1770                         y = y->next;
1771                 if (x == 0)
1772                         return y == 0;
1773                 if (y == 0)
1774                         return x == 0;
1775                 if (x->s.code != y->s.code || x->s.k != y->s.k)
1776                         return 0;
1777                 x = x->next;
1778                 y = y->next;
1779         }
1780 }
1781
1782 static inline int
1783 eq_blk(struct block *b0, struct block *b1)
1784 {
1785         if (b0->s.code == b1->s.code &&
1786             b0->s.k == b1->s.k &&
1787             b0->et.succ == b1->et.succ &&
1788             b0->ef.succ == b1->ef.succ)
1789                 return eq_slist(b0->stmts, b1->stmts);
1790         return 0;
1791 }
1792
1793 static void
1794 intern_blocks(opt_state_t *opt_state, struct icode *ic)
1795 {
1796         struct block *p;
1797         int i, j;
1798         int done1; /* don't shadow global */
1799  top:
1800         done1 = 1;
1801         for (i = 0; i < opt_state->n_blocks; ++i)
1802                 opt_state->blocks[i]->link = 0;
1803
1804         mark_code(ic);
1805
1806         for (i = opt_state->n_blocks - 1; --i >= 0; ) {
1807                 if (!isMarked(ic, opt_state->blocks[i]))
1808                         continue;
1809                 for (j = i + 1; j < opt_state->n_blocks; ++j) {
1810                         if (!isMarked(ic, opt_state->blocks[j]))
1811                                 continue;
1812                         if (eq_blk(opt_state->blocks[i], opt_state->blocks[j])) {
1813                                 opt_state->blocks[i]->link = opt_state->blocks[j]->link ?
1814                                         opt_state->blocks[j]->link : opt_state->blocks[j];
1815                                 break;
1816                         }
1817                 }
1818         }
1819         for (i = 0; i < opt_state->n_blocks; ++i) {
1820                 p = opt_state->blocks[i];
1821                 if (JT(p) == 0)
1822                         continue;
1823                 if (JT(p)->link) {
1824                         done1 = 0;
1825                         JT(p) = JT(p)->link;
1826                 }
1827                 if (JF(p)->link) {
1828                         done1 = 0;
1829                         JF(p) = JF(p)->link;
1830                 }
1831         }
1832         if (!done1)
1833                 goto top;
1834 }
1835
1836 static void
1837 opt_cleanup(opt_state_t *opt_state)
1838 {
1839         free((void *)opt_state->vnode_base);
1840         free((void *)opt_state->vmap);
1841         free((void *)opt_state->edges);
1842         free((void *)opt_state->space);
1843         free((void *)opt_state->levels);
1844         free((void *)opt_state->blocks);
1845 }
1846
1847 /*
1848  * Return the number of stmts in 's'.
1849  */
1850 static u_int
1851 slength(struct slist *s)
1852 {
1853         u_int n = 0;
1854
1855         for (; s; s = s->next)
1856                 if (s->s.code != NOP)
1857                         ++n;
1858         return n;
1859 }
1860
1861 /*
1862  * Return the number of nodes reachable by 'p'.
1863  * All nodes should be initially unmarked.
1864  */
1865 static int
1866 count_blocks(struct icode *ic, struct block *p)
1867 {
1868         if (p == 0 || isMarked(ic, p))
1869                 return 0;
1870         Mark(ic, p);
1871         return count_blocks(ic, JT(p)) + count_blocks(ic, JF(p)) + 1;
1872 }
1873
1874 /*
1875  * Do a depth first search on the flow graph, numbering the
1876  * the basic blocks, and entering them into the 'blocks' array.`
1877  */
1878 static void
1879 number_blks_r(opt_state_t *opt_state, struct icode *ic, struct block *p)
1880 {
1881         int n;
1882
1883         if (p == 0 || isMarked(ic, p))
1884                 return;
1885
1886         Mark(ic, p);
1887         n = opt_state->n_blocks++;
1888         p->id = n;
1889         opt_state->blocks[n] = p;
1890
1891         number_blks_r(opt_state, ic, JT(p));
1892         number_blks_r(opt_state, ic, JF(p));
1893 }
1894
1895 /*
1896  * Return the number of stmts in the flowgraph reachable by 'p'.
1897  * The nodes should be unmarked before calling.
1898  *
1899  * Note that "stmts" means "instructions", and that this includes
1900  *
1901  *      side-effect statements in 'p' (slength(p->stmts));
1902  *
1903  *      statements in the true branch from 'p' (count_stmts(JT(p)));
1904  *
1905  *      statements in the false branch from 'p' (count_stmts(JF(p)));
1906  *
1907  *      the conditional jump itself (1);
1908  *
1909  *      an extra long jump if the true branch requires it (p->longjt);
1910  *
1911  *      an extra long jump if the false branch requires it (p->longjf).
1912  */
1913 static u_int
1914 count_stmts(struct icode *ic, struct block *p)
1915 {
1916         u_int n;
1917
1918         if (p == 0 || isMarked(ic, p))
1919                 return 0;
1920         Mark(ic, p);
1921         n = count_stmts(ic, JT(p)) + count_stmts(ic, JF(p));
1922         return slength(p->stmts) + n + 1 + p->longjt + p->longjf;
1923 }
1924
1925 /*
1926  * Allocate memory.  All allocation is done before optimization
1927  * is begun.  A linear bound on the size of all data structures is computed
1928  * from the total number of blocks and/or statements.
1929  */
1930 static void
1931 opt_init(compiler_state_t *cstate, opt_state_t *opt_state, struct icode *ic)
1932 {
1933         bpf_u_int32 *p;
1934         int i, n, max_stmts;
1935
1936         /*
1937          * First, count the blocks, so we can malloc an array to map
1938          * block number to block.  Then, put the blocks into the array.
1939          */
1940         unMarkAll(ic);
1941         n = count_blocks(ic, ic->root);
1942         opt_state->blocks = (struct block **)calloc(n, sizeof(*opt_state->blocks));
1943         if (opt_state->blocks == NULL)
1944                 bpf_error(cstate, "malloc");
1945         unMarkAll(ic);
1946         opt_state->n_blocks = 0;
1947         number_blks_r(opt_state, ic, ic->root);
1948
1949         opt_state->n_edges = 2 * opt_state->n_blocks;
1950         opt_state->edges = (struct edge **)calloc(opt_state->n_edges, sizeof(*opt_state->edges));
1951         if (opt_state->edges == NULL)
1952                 bpf_error(cstate, "malloc");
1953
1954         /*
1955          * The number of levels is bounded by the number of nodes.
1956          */
1957         opt_state->levels = (struct block **)calloc(opt_state->n_blocks, sizeof(*opt_state->levels));
1958         if (opt_state->levels == NULL)
1959                 bpf_error(cstate, "malloc");
1960
1961         opt_state->edgewords = opt_state->n_edges / (8 * sizeof(bpf_u_int32)) + 1;
1962         opt_state->nodewords = opt_state->n_blocks / (8 * sizeof(bpf_u_int32)) + 1;
1963
1964         /* XXX */
1965         opt_state->space = (bpf_u_int32 *)malloc(2 * opt_state->n_blocks * opt_state->nodewords * sizeof(*opt_state->space)
1966                                  + opt_state->n_edges * opt_state->edgewords * sizeof(*opt_state->space));
1967         if (opt_state->space == NULL)
1968                 bpf_error(cstate, "malloc");
1969         p = opt_state->space;
1970         opt_state->all_dom_sets = p;
1971         for (i = 0; i < n; ++i) {
1972                 opt_state->blocks[i]->dom = p;
1973                 p += opt_state->nodewords;
1974         }
1975         opt_state->all_closure_sets = p;
1976         for (i = 0; i < n; ++i) {
1977                 opt_state->blocks[i]->closure = p;
1978                 p += opt_state->nodewords;
1979         }
1980         opt_state->all_edge_sets = p;
1981         for (i = 0; i < n; ++i) {
1982                 register struct block *b = opt_state->blocks[i];
1983
1984                 b->et.edom = p;
1985                 p += opt_state->edgewords;
1986                 b->ef.edom = p;
1987                 p += opt_state->edgewords;
1988                 b->et.id = i;
1989                 opt_state->edges[i] = &b->et;
1990                 b->ef.id = opt_state->n_blocks + i;
1991                 opt_state->edges[opt_state->n_blocks + i] = &b->ef;
1992                 b->et.pred = b;
1993                 b->ef.pred = b;
1994         }
1995         max_stmts = 0;
1996         for (i = 0; i < n; ++i)
1997                 max_stmts += slength(opt_state->blocks[i]->stmts) + 1;
1998         /*
1999          * We allocate at most 3 value numbers per statement,
2000          * so this is an upper bound on the number of valnodes
2001          * we'll need.
2002          */
2003         opt_state->maxval = 3 * max_stmts;
2004         opt_state->vmap = (struct vmapinfo *)calloc(opt_state->maxval, sizeof(*opt_state->vmap));
2005         opt_state->vnode_base = (struct valnode *)calloc(opt_state->maxval, sizeof(*opt_state->vnode_base));
2006         if (opt_state->vmap == NULL || opt_state->vnode_base == NULL)
2007                 bpf_error(cstate, "malloc");
2008 }
2009
2010 /*
2011  * This is only used when supporting optimizer debugging.  It is
2012  * global state, so do *not* do more than one compile in parallel
2013  * and expect it to provide meaningful information.
2014  */
2015 #ifdef BDEBUG
2016 int bids[1000];
2017 #endif
2018
2019 /*
2020  * Returns true if successful.  Returns false if a branch has
2021  * an offset that is too large.  If so, we have marked that
2022  * branch so that on a subsequent iteration, it will be treated
2023  * properly.
2024  */
2025 static int
2026 convert_code_r(compiler_state_t *cstate, conv_state_t *conv_state,
2027     struct icode *ic, struct block *p)
2028 {
2029         struct bpf_insn *dst;
2030         struct slist *src;
2031         u_int slen;
2032         u_int off;
2033         int extrajmps;          /* number of extra jumps inserted */
2034         struct slist **offset = NULL;
2035
2036         if (p == 0 || isMarked(ic, p))
2037                 return (1);
2038         Mark(ic, p);
2039
2040         if (convert_code_r(cstate, conv_state, ic, JF(p)) == 0)
2041                 return (0);
2042         if (convert_code_r(cstate, conv_state, ic, JT(p)) == 0)
2043                 return (0);
2044
2045         slen = slength(p->stmts);
2046         dst = conv_state->ftail -= (slen + 1 + p->longjt + p->longjf);
2047                 /* inflate length by any extra jumps */
2048
2049         p->offset = (int)(dst - conv_state->fstart);
2050
2051         /* generate offset[] for convenience  */
2052         if (slen) {
2053                 offset = (struct slist **)calloc(slen, sizeof(struct slist *));
2054                 if (!offset) {
2055                         bpf_error(cstate, "not enough core");
2056                         /*NOTREACHED*/
2057                 }
2058         }
2059         src = p->stmts;
2060         for (off = 0; off < slen && src; off++) {
2061 #if 0
2062                 printf("off=%d src=%x\n", off, src);
2063 #endif
2064                 offset[off] = src;
2065                 src = src->next;
2066         }
2067
2068         off = 0;
2069         for (src = p->stmts; src; src = src->next) {
2070                 if (src->s.code == NOP)
2071                         continue;
2072                 dst->code = (u_short)src->s.code;
2073                 dst->k = src->s.k;
2074
2075                 /* fill block-local relative jump */
2076                 if (BPF_CLASS(src->s.code) != BPF_JMP || src->s.code == (BPF_JMP|BPF_JA)) {
2077 #if 0
2078                         if (src->s.jt || src->s.jf) {
2079                                 bpf_error(cstate, "illegal jmp destination");
2080                                 /*NOTREACHED*/
2081                         }
2082 #endif
2083                         goto filled;
2084                 }
2085                 if (off == slen - 2)    /*???*/
2086                         goto filled;
2087
2088             {
2089                 u_int i;
2090                 int jt, jf;
2091                 const char *ljerr = "%s for block-local relative jump: off=%d";
2092
2093 #if 0
2094                 printf("code=%x off=%d %x %x\n", src->s.code,
2095                         off, src->s.jt, src->s.jf);
2096 #endif
2097
2098                 if (!src->s.jt || !src->s.jf) {
2099                         bpf_error(cstate, ljerr, "no jmp destination", off);
2100                         /*NOTREACHED*/
2101                 }
2102
2103                 jt = jf = 0;
2104                 for (i = 0; i < slen; i++) {
2105                         if (offset[i] == src->s.jt) {
2106                                 if (jt) {
2107                                         bpf_error(cstate, ljerr, "multiple matches", off);
2108                                         /*NOTREACHED*/
2109                                 }
2110
2111                                 dst->jt = i - off - 1;
2112                                 jt++;
2113                         }
2114                         if (offset[i] == src->s.jf) {
2115                                 if (jf) {
2116                                         bpf_error(cstate, ljerr, "multiple matches", off);
2117                                         /*NOTREACHED*/
2118                                 }
2119                                 dst->jf = i - off - 1;
2120                                 jf++;
2121                         }
2122                 }
2123                 if (!jt || !jf) {
2124                         bpf_error(cstate, ljerr, "no destination found", off);
2125                         /*NOTREACHED*/
2126                 }
2127             }
2128 filled:
2129                 ++dst;
2130                 ++off;
2131         }
2132         if (offset)
2133                 free(offset);
2134
2135 #ifdef BDEBUG
2136         bids[dst - conv_state->fstart] = p->id + 1;
2137 #endif
2138         dst->code = (u_short)p->s.code;
2139         dst->k = p->s.k;
2140         if (JT(p)) {
2141                 extrajmps = 0;
2142                 off = JT(p)->offset - (p->offset + slen) - 1;
2143                 if (off >= 256) {
2144                     /* offset too large for branch, must add a jump */
2145                     if (p->longjt == 0) {
2146                         /* mark this instruction and retry */
2147                         p->longjt++;
2148                         return(0);
2149                     }
2150                     /* branch if T to following jump */
2151                     dst->jt = extrajmps;
2152                     extrajmps++;
2153                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2154                     dst[extrajmps].k = off - extrajmps;
2155                 }
2156                 else
2157                     dst->jt = off;
2158                 off = JF(p)->offset - (p->offset + slen) - 1;
2159                 if (off >= 256) {
2160                     /* offset too large for branch, must add a jump */
2161                     if (p->longjf == 0) {
2162                         /* mark this instruction and retry */
2163                         p->longjf++;
2164                         return(0);
2165                     }
2166                     /* branch if F to following jump */
2167                     /* if two jumps are inserted, F goes to second one */
2168                     dst->jf = extrajmps;
2169                     extrajmps++;
2170                     dst[extrajmps].code = BPF_JMP|BPF_JA;
2171                     dst[extrajmps].k = off - extrajmps;
2172                 }
2173                 else
2174                     dst->jf = off;
2175         }
2176         return (1);
2177 }
2178
2179
2180 /*
2181  * Convert flowgraph intermediate representation to the
2182  * BPF array representation.  Set *lenp to the number of instructions.
2183  *
2184  * This routine does *NOT* leak the memory pointed to by fp.  It *must
2185  * not* do free(fp) before returning fp; doing so would make no sense,
2186  * as the BPF array pointed to by the return value of icode_to_fcode()
2187  * must be valid - it's being returned for use in a bpf_program structure.
2188  *
2189  * If it appears that icode_to_fcode() is leaking, the problem is that
2190  * the program using pcap_compile() is failing to free the memory in
2191  * the BPF program when it's done - the leak is in the program, not in
2192  * the routine that happens to be allocating the memory.  (By analogy, if
2193  * a program calls fopen() without ever calling fclose() on the FILE *,
2194  * it will leak the FILE structure; the leak is not in fopen(), it's in
2195  * the program.)  Change the program to use pcap_freecode() when it's
2196  * done with the filter program.  See the pcap man page.
2197  */
2198 struct bpf_insn *
2199 icode_to_fcode(compiler_state_t *cstate, struct icode *ic,
2200     struct block *root, u_int *lenp)
2201 {
2202         u_int n;
2203         struct bpf_insn *fp;
2204         conv_state_t conv_state;
2205
2206         /*
2207          * Loop doing convert_code_r() until no branches remain
2208          * with too-large offsets.
2209          */
2210         while (1) {
2211             unMarkAll(ic);
2212             n = *lenp = count_stmts(ic, root);
2213
2214             fp = (struct bpf_insn *)malloc(sizeof(*fp) * n);
2215             if (fp == NULL)
2216                     bpf_error(cstate, "malloc");
2217             memset((char *)fp, 0, sizeof(*fp) * n);
2218             conv_state.fstart = fp;
2219             conv_state.ftail = fp + n;
2220
2221             unMarkAll(ic);
2222             if (convert_code_r(cstate, &conv_state, ic, root))
2223                 break;
2224             free(fp);
2225         }
2226
2227         return fp;
2228 }
2229
2230 /*
2231  * Make a copy of a BPF program and put it in the "fcode" member of
2232  * a "pcap_t".
2233  *
2234  * If we fail to allocate memory for the copy, fill in the "errbuf"
2235  * member of the "pcap_t" with an error message, and return -1;
2236  * otherwise, return 0.
2237  */
2238 int
2239 install_bpf_program(pcap_t *p, struct bpf_program *fp)
2240 {
2241         size_t prog_size;
2242
2243         /*
2244          * Validate the program.
2245          */
2246         if (!bpf_validate(fp->bf_insns, fp->bf_len)) {
2247                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2248                         "BPF program is not valid");
2249                 return (-1);
2250         }
2251
2252         /*
2253          * Free up any already installed program.
2254          */
2255         pcap_freecode(&p->fcode);
2256
2257         prog_size = sizeof(*fp->bf_insns) * fp->bf_len;
2258         p->fcode.bf_len = fp->bf_len;
2259         p->fcode.bf_insns = (struct bpf_insn *)malloc(prog_size);
2260         if (p->fcode.bf_insns == NULL) {
2261                 pcap_snprintf(p->errbuf, sizeof(p->errbuf),
2262                          "malloc: %s", pcap_strerror(errno));
2263                 return (-1);
2264         }
2265         memcpy(p->fcode.bf_insns, fp->bf_insns, prog_size);
2266         return (0);
2267 }
2268
2269 #ifdef BDEBUG
2270 static void
2271 dot_dump_node(struct icode *ic, struct block *block, struct bpf_program *prog,
2272     FILE *out)
2273 {
2274         int icount, noffset;
2275         int i;
2276
2277         if (block == NULL || isMarked(ic, block))
2278                 return;
2279         Mark(ic, block);
2280
2281         icount = slength(block->stmts) + 1 + block->longjt + block->longjf;
2282         noffset = min(block->offset + icount, (int)prog->bf_len);
2283
2284         fprintf(out, "\tblock%d [shape=ellipse, id=\"block-%d\" label=\"BLOCK%d\\n", block->id, block->id, block->id);
2285         for (i = block->offset; i < noffset; i++) {
2286                 fprintf(out, "\\n%s", bpf_image(prog->bf_insns + i, i));
2287         }
2288         fprintf(out, "\" tooltip=\"");
2289         for (i = 0; i < BPF_MEMWORDS; i++)
2290                 if (block->val[i] != 0)
2291                         fprintf(out, "val[%d]=%d ", i, block->val[i]);
2292         fprintf(out, "val[A]=%d ", block->val[A_ATOM]);
2293         fprintf(out, "val[X]=%d", block->val[X_ATOM]);
2294         fprintf(out, "\"");
2295         if (JT(block) == NULL)
2296                 fprintf(out, ", peripheries=2");
2297         fprintf(out, "];\n");
2298
2299         dot_dump_node(ic, JT(block), prog, out);
2300         dot_dump_node(ic, JF(block), prog, out);
2301 }
2302
2303 static void
2304 dot_dump_edge(struct icode *ic, struct block *block, FILE *out)
2305 {
2306         if (block == NULL || isMarked(ic, block))
2307                 return;
2308         Mark(ic, block);
2309
2310         if (JT(block)) {
2311                 fprintf(out, "\t\"block%d\":se -> \"block%d\":n [label=\"T\"]; \n",
2312                                 block->id, JT(block)->id);
2313                 fprintf(out, "\t\"block%d\":sw -> \"block%d\":n [label=\"F\"]; \n",
2314                            block->id, JF(block)->id);
2315         }
2316         dot_dump_edge(ic, JT(block), out);
2317         dot_dump_edge(ic, JF(block), out);
2318 }
2319
2320 /* Output the block CFG using graphviz/DOT language
2321  * In the CFG, block's code, value index for each registers at EXIT,
2322  * and the jump relationship is show.
2323  *
2324  * example DOT for BPF `ip src host 1.1.1.1' is:
2325     digraph BPF {
2326         block0 [shape=ellipse, id="block-0" label="BLOCK0\n\n(000) ldh      [12]\n(001) jeq      #0x800           jt 2  jf 5" tooltip="val[A]=0 val[X]=0"];
2327         block1 [shape=ellipse, id="block-1" label="BLOCK1\n\n(002) ld       [26]\n(003) jeq      #0x1010101       jt 4  jf 5" tooltip="val[A]=0 val[X]=0"];
2328         block2 [shape=ellipse, id="block-2" label="BLOCK2\n\n(004) ret      #68" tooltip="val[A]=0 val[X]=0", peripheries=2];
2329         block3 [shape=ellipse, id="block-3" label="BLOCK3\n\n(005) ret      #0" tooltip="val[A]=0 val[X]=0", peripheries=2];
2330         "block0":se -> "block1":n [label="T"];
2331         "block0":sw -> "block3":n [label="F"];
2332         "block1":se -> "block2":n [label="T"];
2333         "block1":sw -> "block3":n [label="F"];
2334     }
2335  *
2336  *  After install graphviz on http://www.graphviz.org/, save it as bpf.dot
2337  *  and run `dot -Tpng -O bpf.dot' to draw the graph.
2338  */
2339 static void
2340 dot_dump(compiler_state_t *cstate, struct icode *ic)
2341 {
2342         struct bpf_program f;
2343         FILE *out = stdout;
2344
2345         memset(bids, 0, sizeof bids);
2346         f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2347
2348         fprintf(out, "digraph BPF {\n");
2349         ic->cur_mark = 0;
2350         unMarkAll(ic);
2351         dot_dump_node(ic, ic->root, &f, out);
2352         ic->cur_mark = 0;
2353         unMarkAll(ic);
2354         dot_dump_edge(ic, ic->root, out);
2355         fprintf(out, "}\n");
2356
2357         free((char *)f.bf_insns);
2358 }
2359
2360 static void
2361 plain_dump(compiler_state_t *cstate, struct icode *ic)
2362 {
2363         struct bpf_program f;
2364
2365         memset(bids, 0, sizeof bids);
2366         f.bf_insns = icode_to_fcode(cstate, ic, ic->root, &f.bf_len);
2367         bpf_dump(&f, 1);
2368         putchar('\n');
2369         free((char *)f.bf_insns);
2370 }
2371
2372 static void
2373 opt_dump(compiler_state_t *cstate, struct icode *ic)
2374 {
2375         /* if optimizer debugging is enabled, output DOT graph
2376          * `pcap_optimizer_debug=4' is equivalent to -dddd to follow -d/-dd/-ddd
2377          * convention in tcpdump command line
2378          */
2379         if (pcap_optimizer_debug > 3)
2380                 dot_dump(cstate, ic);
2381         else
2382                 plain_dump(cstate, ic);
2383 }
2384 #endif