gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   bitmap depends_on;    /* The list of invariants that have to be
 350                            preserved.  */
 351   tree value;           /* For final value elimination, the expression for
 352                            the final value of the iv.  For iv elimination,
 353                            the new bound to compare with.  */
 354   enum tree_code comp;  /* For iv elimination, the comparison.  */
 355   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
 356 };
 357
 358 /* Use.  */
 359 struct iv_use
 360 {
 361   unsigned id;          /* The id of the use.  */
 362   unsigned group_id;    /* The group id the use belongs to.  */
 363   enum use_type type;   /* Type of the use.  */
 364   struct iv *iv;        /* The induction variable it is based on.  */
 365   gimple *stmt;         /* Statement in that it occurs.  */
 366   tree *op_p;           /* The place where it occurs.  */
 367
 368   tree addr_base;       /* Base address with const offset stripped.  */
 369   unsigned HOST_WIDE_INT addr_offset;
 370                         /* Const offset stripped from base address.  */
 371 };
 372
 373 /* Group of uses.  */
 374 struct iv_group
 375 {
 376   /* The id of the group.  */
 377   unsigned id;
 378   /* Uses of the group are of the same type.  */
 379   enum use_type type;
 380   /* The set of "related" IV candidates, plus the important ones.  */
 381   bitmap related_cands;
 382   /* Number of IV candidates in the cost_map.  */
 383   unsigned n_map_members;
 384   /* The costs wrto the iv candidates.  */
 385   struct cost_pair *cost_map;
 386   /* The selected candidate for the group.  */
 387   struct iv_cand *selected;
 388   /* Uses in the group.  */
 389   vec<struct iv_use *> vuses;
 390 };
 391
 392 /* The position where the iv is computed.  */
 393 enum iv_position
 394 {
 395   IP_NORMAL,            /* At the end, just before the exit condition.  */
 396   IP_END,               /* At the end of the latch block.  */
 397   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 398   IP_AFTER_USE,         /* Immediately after a specific use.  */
 399   IP_ORIGINAL           /* The original biv.  */
 400 };
 401
 402 /* The induction variable candidate.  */
 403 struct iv_cand
 404 {
 405   unsigned id;          /* The number of the candidate.  */
 406   bool important;       /* Whether this is an "important" candidate, i.e. such
 407                            that it should be considered by all uses.  */
 408   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 409   gimple *incremented_at;/* For original biv, the statement where it is
 410                            incremented.  */
 411   tree var_before;      /* The variable used for it before increment.  */
 412   tree var_after;       /* The variable used for it after increment.  */
 413   struct iv *iv;        /* The value of the candidate.  NULL for
 414                            "pseudocandidate" used to indicate the possibility
 415                            to replace the final value of an iv by direct
 416                            computation of the value.  */
 417   unsigned cost;        /* Cost of the candidate.  */
 418   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 419   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 420                               where it is incremented.  */
 421   bitmap depends_on;    /* The list of invariants that are used in step of the
 422                            biv.  */
 423   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 424                            smaller type.  */
 425 };
 426
 427 /* Hashtable entry for common candidate derived from iv uses.  */
 428 struct iv_common_cand
 429 {
 430   tree base;
 431   tree step;
 432   /* IV uses from which this common candidate is derived.  */
 433   auto_vec<struct iv_use *> uses;
 434   hashval_t hash;
 435 };
 436
 437 /* Hashtable helpers.  */
 438
 439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 440 {
 441   static inline hashval_t hash (const iv_common_cand *);
 442   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 443 };
 444
 445 /* Hash function for possible common candidates.  */
 446
 447 inline hashval_t
 448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 449 {
 450   return ccand->hash;
 451 }
 452
 453 /* Hash table equality function for common candidates.  */
 454
 455 inline bool
 456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 457                               const iv_common_cand *ccand2)
 458 {
 459   return (ccand1->hash == ccand2->hash
 460           && operand_equal_p (ccand1->base, ccand2->base, 0)
 461           && operand_equal_p (ccand1->step, ccand2->step, 0)
 462           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 463               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 464 }
 465
 466 /* Loop invariant expression hashtable entry.  */
 467
 468 struct iv_inv_expr_ent
 469 {
 470   /* Tree expression of the entry.  */
 471   tree expr;
 472   /* Unique indentifier.  */
 473   int id;
 474   /* Hash value.  */
 475   hashval_t hash;
 476 };
 477
 478 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 479
 480 static int
 481 sort_iv_inv_expr_ent (const void *a, const void *b)
 482 {
 483   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 484   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 485
 486   unsigned id1 = (*e1)->id;
 487   unsigned id2 = (*e2)->id;
 488
 489   if (id1 < id2)
 490     return -1;
 491   else if (id1 > id2)
 492     return 1;
 493   else
 494     return 0;
 495 }
 496
 497 /* Hashtable helpers.  */
 498
 499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 500 {
 501   static inline hashval_t hash (const iv_inv_expr_ent *);
 502   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 503 };
 504
 505 /* Hash function for loop invariant expressions.  */
 506
 507 inline hashval_t
 508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 509 {
 510   return expr->hash;
 511 }
 512
 513 /* Hash table equality function for expressions.  */
 514
 515 inline bool
 516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 517                            const iv_inv_expr_ent *expr2)
 518 {
 519   return expr1->hash == expr2->hash
 520          && operand_equal_p (expr1->expr, expr2->expr, 0);
 521 }
 522
 523 struct ivopts_data
 524 {
 525   /* The currently optimized loop.  */
 526   struct loop *current_loop;
 527   source_location loop_loc;
 528
 529   /* Numbers of iterations for all exits of the current loop.  */
 530   hash_map<edge, tree_niter_desc *> *niters;
 531
 532   /* Number of registers used in it.  */
 533   unsigned regs_used;
 534
 535   /* The size of version_info array allocated.  */
 536   unsigned version_info_size;
 537
 538   /* The array of information for the ssa names.  */
 539   struct version_info *version_info;
 540
 541   /* The hashtable of loop invariant expressions created
 542      by ivopt.  */
 543   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 544
 545   /* Loop invariant expression id.  */
 546   int max_inv_expr_id;
 547
 548   /* The bitmap of indices in version_info whose value was changed.  */
 549   bitmap relevant;
 550
 551   /* The uses of induction variables.  */
 552   vec<iv_group *> vgroups;
 553
 554   /* The candidates.  */
 555   vec<iv_cand *> vcands;
 556
 557   /* A bitmap of important candidates.  */
 558   bitmap important_candidates;
 559
 560   /* Cache used by tree_to_aff_combination_expand.  */
 561   hash_map<tree, name_expansion *> *name_expansion_cache;
 562
 563   /* The hashtable of common candidates derived from iv uses.  */
 564   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 565
 566   /* The common candidates.  */
 567   vec<iv_common_cand *> iv_common_cands;
 568
 569   /* The maximum invariant id.  */
 570   unsigned max_inv_id;
 571
 572   /* Number of no_overflow BIVs which are not used in memory address.  */
 573   unsigned bivs_not_used_in_addr;
 574
 575   /* Obstack for iv structure.  */
 576   struct obstack iv_obstack;
 577
 578   /* Whether to consider just related and important candidates when replacing a
 579      use.  */
 580   bool consider_all_candidates;
 581
 582   /* Are we optimizing for speed?  */
 583   bool speed;
 584
 585   /* Whether the loop body includes any function calls.  */
 586   bool body_includes_call;
 587
 588   /* Whether the loop body can only be exited via single exit.  */
 589   bool loop_single_exit_p;
 590 };
 591
 592 /* An assignment of iv candidates to uses.  */
 593
 594 struct iv_ca
 595 {
 596   /* The number of uses covered by the assignment.  */
 597   unsigned upto;
 598
 599   /* Number of uses that cannot be expressed by the candidates in the set.  */
 600   unsigned bad_groups;
 601
 602   /* Candidate assigned to a use, together with the related costs.  */
 603   struct cost_pair **cand_for_group;
 604
 605   /* Number of times each candidate is used.  */
 606   unsigned *n_cand_uses;
 607
 608   /* The candidates used.  */
 609   bitmap cands;
 610
 611   /* The number of candidates in the set.  */
 612   unsigned n_cands;
 613
 614   /* Total number of registers needed.  */
 615   unsigned n_regs;
 616
 617   /* Total cost of expressing uses.  */
 618   comp_cost cand_use_cost;
 619
 620   /* Total cost of candidates.  */
 621   unsigned cand_cost;
 622
 623   /* Number of times each invariant is used.  */
 624   unsigned *n_invariant_uses;
 625
 626   /* Hash set with used invariant expression.  */
 627   hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
 628
 629   /* Total cost of the assignment.  */
 630   comp_cost cost;
 631 };
 632
 633 /* Difference of two iv candidate assignments.  */
 634
 635 struct iv_ca_delta
 636 {
 637   /* Changed group.  */
 638   struct iv_group *group;
 639
 640   /* An old assignment (for rollback purposes).  */
 641   struct cost_pair *old_cp;
 642
 643   /* A new assignment.  */
 644   struct cost_pair *new_cp;
 645
 646   /* Next change in the list.  */
 647   struct iv_ca_delta *next;
 648 };
 649
 650 /* Bound on number of candidates below that all candidates are considered.  */
 651
 652 #define CONSIDER_ALL_CANDIDATES_BOUND \
 653   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 654
 655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 656    optimizing such a loop would help, and it would take ages).  */
 657
 658 #define MAX_CONSIDERED_GROUPS \
 659   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 660
 661 /* If there are at most this number of ivs in the set, try removing unnecessary
 662    ivs from the set always.  */
 663
 664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 665   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 666
 667 /* The list of trees for that the decl_rtl field must be reset is stored
 668    here.  */
 669
 670 static vec<tree> decl_rtl_to_reset;
 671
 672 static comp_cost force_expr_to_var_cost (tree, bool);
 673
 674 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 675
 676 edge
 677 single_dom_exit (struct loop *loop)
 678 {
 679   edge exit = single_exit (loop);
 680
 681   if (!exit)
 682     return NULL;
 683
 684   if (!just_once_each_iteration_p (loop, exit->src))
 685     return NULL;
 686
 687   return exit;
 688 }
 689
 690 /* Dumps information about the induction variable IV to FILE.  Don't dump
 691    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 692    preceding spaces indicated by INDENT_LEVEL.  */
 693
 694 void
 695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 696 {
 697   const char *p;
 698   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 699
 700   if (indent_level > 4)
 701     indent_level = 4;
 702   p = spaces + 8 - (indent_level << 1);
 703
 704   fprintf (file, "%sIV struct:\n", p);
 705   if (iv->ssa_name && dump_name)
 706     {
 707       fprintf (file, "%s  SSA_NAME:\t", p);
 708       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 709       fprintf (file, "\n");
 710     }
 711
 712   fprintf (file, "%s  Type:\t", p);
 713   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 714   fprintf (file, "\n");
 715
 716   fprintf (file, "%s  Base:\t", p);
 717   print_generic_expr (file, iv->base, TDF_SLIM);
 718   fprintf (file, "\n");
 719
 720   fprintf (file, "%s  Step:\t", p);
 721   print_generic_expr (file, iv->step, TDF_SLIM);
 722   fprintf (file, "\n");
 723
 724   if (iv->base_object)
 725     {
 726       fprintf (file, "%s  Object:\t", p);
 727       print_generic_expr (file, iv->base_object, TDF_SLIM);
 728       fprintf (file, "\n");
 729     }
 730
 731   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 732
 733   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 734            p, iv->no_overflow ? "No-overflow" : "Overflow");
 735 }
 736
 737 /* Dumps information about the USE to FILE.  */
 738
 739 void
 740 dump_use (FILE *file, struct iv_use *use)
 741 {
 742   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 743   fprintf (file, "    At stmt:\t");
 744   print_gimple_stmt (file, use->stmt, 0, 0);
 745   fprintf (file, "    At pos:\t");
 746   if (use->op_p)
 747     print_generic_expr (file, *use->op_p, TDF_SLIM);
 748   fprintf (file, "\n");
 749   dump_iv (file, use->iv, false, 2);
 750 }
 751
 752 /* Dumps information about the uses to FILE.  */
 753
 754 void
 755 dump_groups (FILE *file, struct ivopts_data *data)
 756 {
 757   unsigned i, j;
 758   struct iv_group *group;
 759
 760   for (i = 0; i < data->vgroups.length (); i++)
 761     {
 762       group = data->vgroups[i];
 763       fprintf (file, "Group %d:\n", group->id);
 764       if (group->type == USE_NONLINEAR_EXPR)
 765         fprintf (file, "  Type:\tGENERIC\n");
 766       else if (group->type == USE_ADDRESS)
 767         fprintf (file, "  Type:\tADDRESS\n");
 768       else
 769         {
 770           gcc_assert (group->type == USE_COMPARE);
 771           fprintf (file, "  Type:\tCOMPARE\n");
 772         }
 773       for (j = 0; j < group->vuses.length (); j++)
 774         dump_use (file, group->vuses[j]);
 775     }
 776 }
 777
 778 /* Dumps information about induction variable candidate CAND to FILE.  */
 779
 780 void
 781 dump_cand (FILE *file, struct iv_cand *cand)
 782 {
 783   struct iv *iv = cand->iv;
 784
 785   fprintf (file, "Candidate %d:\n", cand->id);
 786   if (cand->depends_on)
 787     {
 788       fprintf (file, "  Depend on: ");
 789       dump_bitmap (file, cand->depends_on);
 790     }
 791
 792   if (cand->var_before)
 793     {
 794       fprintf (file, "  Var befor: ");
 795       print_generic_expr (file, cand->var_before, TDF_SLIM);
 796       fprintf (file, "\n");
 797     }
 798   if (cand->var_after)
 799     {
 800       fprintf (file, "  Var after: ");
 801       print_generic_expr (file, cand->var_after, TDF_SLIM);
 802       fprintf (file, "\n");
 803     }
 804
 805   switch (cand->pos)
 806     {
 807     case IP_NORMAL:
 808       fprintf (file, "  Incr POS: before exit test\n");
 809       break;
 810
 811     case IP_BEFORE_USE:
 812       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 813       break;
 814
 815     case IP_AFTER_USE:
 816       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 817       break;
 818
 819     case IP_END:
 820       fprintf (file, "  Incr POS: at end\n");
 821       break;
 822
 823     case IP_ORIGINAL:
 824       fprintf (file, "  Incr POS: orig biv\n");
 825       break;
 826     }
 827
 828   dump_iv (file, iv, false, 1);
 829 }
 830
 831 /* Returns the info for ssa version VER.  */
 832
 833 static inline struct version_info *
 834 ver_info (struct ivopts_data *data, unsigned ver)
 835 {
 836   return data->version_info + ver;
 837 }
 838
 839 /* Returns the info for ssa name NAME.  */
 840
 841 static inline struct version_info *
 842 name_info (struct ivopts_data *data, tree name)
 843 {
 844   return ver_info (data, SSA_NAME_VERSION (name));
 845 }
 846
 847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 848    emitted in LOOP.  */
 849
 850 static bool
 851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 852 {
 853   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 854
 855   gcc_assert (bb);
 856
 857   if (sbb == loop->latch)
 858     return true;
 859
 860   if (sbb != bb)
 861     return false;
 862
 863   return stmt == last_stmt (bb);
 864 }
 865
 866 /* Returns true if STMT if after the place where the original induction
 867    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 868    if the positions are identical.  */
 869
 870 static bool
 871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 872 {
 873   basic_block cand_bb = gimple_bb (cand->incremented_at);
 874   basic_block stmt_bb = gimple_bb (stmt);
 875
 876   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 877     return false;
 878
 879   if (stmt_bb != cand_bb)
 880     return true;
 881
 882   if (true_if_equal
 883       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 884     return true;
 885   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 886 }
 887
 888 /* Returns true if STMT if after the place where the induction variable
 889    CAND is incremented in LOOP.  */
 890
 891 static bool
 892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 893 {
 894   switch (cand->pos)
 895     {
 896     case IP_END:
 897       return false;
 898
 899     case IP_NORMAL:
 900       return stmt_after_ip_normal_pos (loop, stmt);
 901
 902     case IP_ORIGINAL:
 903     case IP_AFTER_USE:
 904       return stmt_after_inc_pos (cand, stmt, false);
 905
 906     case IP_BEFORE_USE:
 907       return stmt_after_inc_pos (cand, stmt, true);
 908
 909     default:
 910       gcc_unreachable ();
 911     }
 912 }
 913
 914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 915
 916 static bool
 917 abnormal_ssa_name_p (tree exp)
 918 {
 919   if (!exp)
 920     return false;
 921
 922   if (TREE_CODE (exp) != SSA_NAME)
 923     return false;
 924
 925   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 926 }
 927
 928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 929    abnormal phi node.  Callback for for_each_index.  */
 930
 931 static bool
 932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 933                                   void *data ATTRIBUTE_UNUSED)
 934 {
 935   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 936     {
 937       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 938         return false;
 939       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 940         return false;
 941     }
 942
 943   return !abnormal_ssa_name_p (*index);
 944 }
 945
 946 /* Returns true if EXPR contains a ssa name that occurs in an
 947    abnormal phi node.  */
 948
 949 bool
 950 contains_abnormal_ssa_name_p (tree expr)
 951 {
 952   enum tree_code code;
 953   enum tree_code_class codeclass;
 954
 955   if (!expr)
 956     return false;
 957
 958   code = TREE_CODE (expr);
 959   codeclass = TREE_CODE_CLASS (code);
 960
 961   if (code == SSA_NAME)
 962     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 963
 964   if (code == INTEGER_CST
 965       || is_gimple_min_invariant (expr))
 966     return false;
 967
 968   if (code == ADDR_EXPR)
 969     return !for_each_index (&TREE_OPERAND (expr, 0),
 970                             idx_contains_abnormal_ssa_name_p,
 971                             NULL);
 972
 973   if (code == COND_EXPR)
 974     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 975       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 976       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 977
 978   switch (codeclass)
 979     {
 980     case tcc_binary:
 981     case tcc_comparison:
 982       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 983         return true;
 984
 985       /* Fallthru.  */
 986     case tcc_unary:
 987       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 988         return true;
 989
 990       break;
 991
 992     default:
 993       gcc_unreachable ();
 994     }
 995
 996   return false;
 997 }
 998
 999 /*  Returns the structure describing number of iterations determined from
1000     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1001
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1004 {
1005   struct tree_niter_desc *desc;
1006   tree_niter_desc **slot;
1007
1008   if (!data->niters)
1009     {
1010       data->niters = new hash_map<edge, tree_niter_desc *>;
1011       slot = NULL;
1012     }
1013   else
1014     slot = data->niters->get (exit);
1015
1016   if (!slot)
1017     {
1018       /* Try to determine number of iterations.  We cannot safely work with ssa
1019          names that appear in phi nodes on abnormal edges, so that we do not
1020          create overlapping life ranges for them (PR 27283).  */
1021       desc = XNEW (struct tree_niter_desc);
1022       if (!number_of_iterations_exit (data->current_loop,
1023                                       exit, desc, true)
1024           || contains_abnormal_ssa_name_p (desc->niter))
1025         {
1026           XDELETE (desc);
1027           desc = NULL;
1028         }
1029       data->niters->put (exit, desc);
1030     }
1031   else
1032     desc = *slot;
1033
1034   return desc;
1035 }
1036
1037 /* Returns the structure describing number of iterations determined from
1038    single dominating exit of DATA->current_loop, or NULL if something
1039    goes wrong.  */
1040
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1043 {
1044   edge exit = single_dom_exit (data->current_loop);
1045
1046   if (!exit)
1047     return NULL;
1048
1049   return niter_for_exit (data, exit);
1050 }
1051
1052 /* Initializes data structures used by the iv optimization pass, stored
1053    in DATA.  */
1054
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1057 {
1058   data->version_info_size = 2 * num_ssa_names;
1059   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060   data->relevant = BITMAP_ALLOC (NULL);
1061   data->important_candidates = BITMAP_ALLOC (NULL);
1062   data->max_inv_id = 0;
1063   data->niters = NULL;
1064   data->vgroups.create (20);
1065   data->vcands.create (20);
1066   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067   data->max_inv_expr_id = 0;
1068   data->name_expansion_cache = NULL;
1069   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070   data->iv_common_cands.create (20);
1071   decl_rtl_to_reset.create (20);
1072   gcc_obstack_init (&data->iv_obstack);
1073 }
1074
1075 /* Returns a memory object to that EXPR points.  In case we are able to
1076    determine that it does not point to any such object, NULL is returned.  */
1077
1078 static tree
1079 determine_base_object (tree expr)
1080 {
1081   enum tree_code code = TREE_CODE (expr);
1082   tree base, obj;
1083
1084   /* If this is a pointer casted to any type, we need to determine
1085      the base object for the pointer; so handle conversions before
1086      throwing away non-pointer expressions.  */
1087   if (CONVERT_EXPR_P (expr))
1088     return determine_base_object (TREE_OPERAND (expr, 0));
1089
1090   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091     return NULL_TREE;
1092
1093   switch (code)
1094     {
1095     case INTEGER_CST:
1096       return NULL_TREE;
1097
1098     case ADDR_EXPR:
1099       obj = TREE_OPERAND (expr, 0);
1100       base = get_base_address (obj);
1101
1102       if (!base)
1103         return expr;
1104
1105       if (TREE_CODE (base) == MEM_REF)
1106         return determine_base_object (TREE_OPERAND (base, 0));
1107
1108       return fold_convert (ptr_type_node,
1109                            build_fold_addr_expr (base));
1110
1111     case POINTER_PLUS_EXPR:
1112       return determine_base_object (TREE_OPERAND (expr, 0));
1113
1114     case PLUS_EXPR:
1115     case MINUS_EXPR:
1116       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1117       gcc_unreachable ();
1118
1119     default:
1120       return fold_convert (ptr_type_node, expr);
1121     }
1122 }
1123
1124 /* Return true if address expression with non-DECL_P operand appears
1125    in EXPR.  */
1126
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1129 {
1130   bool res = false;
1131
1132   STRIP_NOPS (expr);
1133   switch (TREE_CODE (expr))
1134     {
1135     case POINTER_PLUS_EXPR:
1136     case PLUS_EXPR:
1137     case MINUS_EXPR:
1138       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140       break;
1141
1142     case ADDR_EXPR:
1143       return (!DECL_P (TREE_OPERAND (expr, 0)));
1144
1145     default:
1146       return false;
1147     }
1148
1149   return res;
1150 }
1151
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1154
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157           bool no_overflow = false)
1158 {
1159   tree expr = base;
1160   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161                                               sizeof (struct iv));
1162   gcc_assert (step != NULL_TREE);
1163
1164   /* Lower address expression in base except ones with DECL_P as operand.
1165      By doing this:
1166        1) More accurate cost can be computed for address expressions;
1167        2) Duplicate candidates won't be created for bases in different
1168           forms, like &a[0] and &a.  */
1169   STRIP_NOPS (expr);
1170   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171       || contain_complex_addr_expr (expr))
1172     {
1173       aff_tree comb;
1174       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1175       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1176     }
1177
1178   iv->base = base;
1179   iv->base_object = determine_base_object (base);
1180   iv->step = step;
1181   iv->biv_p = false;
1182   iv->nonlin_use = NULL;
1183   iv->ssa_name = NULL_TREE;
1184   if (!no_overflow
1185        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1186                               base, step))
1187     no_overflow = true;
1188   iv->no_overflow = no_overflow;
1189   iv->have_address_use = false;
1190
1191   return iv;
1192 }
1193
1194 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1195    doesn't overflow.  */
1196
1197 static void
1198 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1199         bool no_overflow)
1200 {
1201   struct version_info *info = name_info (data, iv);
1202
1203   gcc_assert (!info->iv);
1204
1205   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1206   info->iv = alloc_iv (data, base, step, no_overflow);
1207   info->iv->ssa_name = iv;
1208 }
1209
1210 /* Finds induction variable declaration for VAR.  */
1211
1212 static struct iv *
1213 get_iv (struct ivopts_data *data, tree var)
1214 {
1215   basic_block bb;
1216   tree type = TREE_TYPE (var);
1217
1218   if (!POINTER_TYPE_P (type)
1219       && !INTEGRAL_TYPE_P (type))
1220     return NULL;
1221
1222   if (!name_info (data, var)->iv)
1223     {
1224       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1225
1226       if (!bb
1227           || !flow_bb_inside_loop_p (data->current_loop, bb))
1228         set_iv (data, var, var, build_int_cst (type, 0), true);
1229     }
1230
1231   return name_info (data, var)->iv;
1232 }
1233
1234 /* Return the first non-invariant ssa var found in EXPR.  */
1235
1236 static tree
1237 extract_single_var_from_expr (tree expr)
1238 {
1239   int i, n;
1240   tree tmp;
1241   enum tree_code code;
1242
1243   if (!expr || is_gimple_min_invariant (expr))
1244     return NULL;
1245
1246   code = TREE_CODE (expr);
1247   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1248     {
1249       n = TREE_OPERAND_LENGTH (expr);
1250       for (i = 0; i < n; i++)
1251         {
1252           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1253
1254           if (tmp)
1255             return tmp;
1256         }
1257     }
1258   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1259 }
1260
1261 /* Finds basic ivs.  */
1262
1263 static bool
1264 find_bivs (struct ivopts_data *data)
1265 {
1266   gphi *phi;
1267   affine_iv iv;
1268   tree step, type, base, stop;
1269   bool found = false;
1270   struct loop *loop = data->current_loop;
1271   gphi_iterator psi;
1272
1273   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1274     {
1275       phi = psi.phi ();
1276
1277       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1278         continue;
1279
1280       if (virtual_operand_p (PHI_RESULT (phi)))
1281         continue;
1282
1283       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1284         continue;
1285
1286       if (integer_zerop (iv.step))
1287         continue;
1288
1289       step = iv.step;
1290       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1291       /* Stop expanding iv base at the first ssa var referred by iv step.
1292          Ideally we should stop at any ssa var, because that's expensive
1293          and unusual to happen, we just do it on the first one.
1294
1295          See PR64705 for the rationale.  */
1296       stop = extract_single_var_from_expr (step);
1297       base = expand_simple_operations (base, stop);
1298       if (contains_abnormal_ssa_name_p (base)
1299           || contains_abnormal_ssa_name_p (step))
1300         continue;
1301
1302       type = TREE_TYPE (PHI_RESULT (phi));
1303       base = fold_convert (type, base);
1304       if (step)
1305         {
1306           if (POINTER_TYPE_P (type))
1307             step = convert_to_ptrofftype (step);
1308           else
1309             step = fold_convert (type, step);
1310         }
1311
1312       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1313       found = true;
1314     }
1315
1316   return found;
1317 }
1318
1319 /* Marks basic ivs.  */
1320
1321 static void
1322 mark_bivs (struct ivopts_data *data)
1323 {
1324   gphi *phi;
1325   gimple *def;
1326   tree var;
1327   struct iv *iv, *incr_iv;
1328   struct loop *loop = data->current_loop;
1329   basic_block incr_bb;
1330   gphi_iterator psi;
1331
1332   data->bivs_not_used_in_addr = 0;
1333   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1334     {
1335       phi = psi.phi ();
1336
1337       iv = get_iv (data, PHI_RESULT (phi));
1338       if (!iv)
1339         continue;
1340
1341       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1342       def = SSA_NAME_DEF_STMT (var);
1343       /* Don't mark iv peeled from other one as biv.  */
1344       if (def
1345           && gimple_code (def) == GIMPLE_PHI
1346           && gimple_bb (def) == loop->header)
1347         continue;
1348
1349       incr_iv = get_iv (data, var);
1350       if (!incr_iv)
1351         continue;
1352
1353       /* If the increment is in the subloop, ignore it.  */
1354       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1355       if (incr_bb->loop_father != data->current_loop
1356           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1357         continue;
1358
1359       iv->biv_p = true;
1360       incr_iv->biv_p = true;
1361       if (iv->no_overflow)
1362         data->bivs_not_used_in_addr++;
1363       if (incr_iv->no_overflow)
1364         data->bivs_not_used_in_addr++;
1365     }
1366 }
1367
1368 /* Checks whether STMT defines a linear induction variable and stores its
1369    parameters to IV.  */
1370
1371 static bool
1372 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1373 {
1374   tree lhs, stop;
1375   struct loop *loop = data->current_loop;
1376
1377   iv->base = NULL_TREE;
1378   iv->step = NULL_TREE;
1379
1380   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1381     return false;
1382
1383   lhs = gimple_assign_lhs (stmt);
1384   if (TREE_CODE (lhs) != SSA_NAME)
1385     return false;
1386
1387   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1388     return false;
1389
1390   /* Stop expanding iv base at the first ssa var referred by iv step.
1391      Ideally we should stop at any ssa var, because that's expensive
1392      and unusual to happen, we just do it on the first one.
1393
1394      See PR64705 for the rationale.  */
1395   stop = extract_single_var_from_expr (iv->step);
1396   iv->base = expand_simple_operations (iv->base, stop);
1397   if (contains_abnormal_ssa_name_p (iv->base)
1398       || contains_abnormal_ssa_name_p (iv->step))
1399     return false;
1400
1401   /* If STMT could throw, then do not consider STMT as defining a GIV.
1402      While this will suppress optimizations, we can not safely delete this
1403      GIV and associated statements, even if it appears it is not used.  */
1404   if (stmt_could_throw_p (stmt))
1405     return false;
1406
1407   return true;
1408 }
1409
1410 /* Finds general ivs in statement STMT.  */
1411
1412 static void
1413 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1414 {
1415   affine_iv iv;
1416
1417   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1418     return;
1419
1420   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1421 }
1422
1423 /* Finds general ivs in basic block BB.  */
1424
1425 static void
1426 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1427 {
1428   gimple_stmt_iterator bsi;
1429
1430   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1431     find_givs_in_stmt (data, gsi_stmt (bsi));
1432 }
1433
1434 /* Finds general ivs.  */
1435
1436 static void
1437 find_givs (struct ivopts_data *data)
1438 {
1439   struct loop *loop = data->current_loop;
1440   basic_block *body = get_loop_body_in_dom_order (loop);
1441   unsigned i;
1442
1443   for (i = 0; i < loop->num_nodes; i++)
1444     find_givs_in_bb (data, body[i]);
1445   free (body);
1446 }
1447
1448 /* For each ssa name defined in LOOP determines whether it is an induction
1449    variable and if so, its initial value and step.  */
1450
1451 static bool
1452 find_induction_variables (struct ivopts_data *data)
1453 {
1454   unsigned i;
1455   bitmap_iterator bi;
1456
1457   if (!find_bivs (data))
1458     return false;
1459
1460   find_givs (data);
1461   mark_bivs (data);
1462
1463   if (dump_file && (dump_flags & TDF_DETAILS))
1464     {
1465       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1466
1467       if (niter)
1468         {
1469           fprintf (dump_file, "  number of iterations ");
1470           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1471           if (!integer_zerop (niter->may_be_zero))
1472             {
1473               fprintf (dump_file, "; zero if ");
1474               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1475             }
1476           fprintf (dump_file, "\n");
1477         };
1478
1479       fprintf (dump_file, "\n<Induction Vars>:\n");
1480       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1481         {
1482           struct version_info *info = ver_info (data, i);
1483           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1484             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1485         }
1486     }
1487
1488   return true;
1489 }
1490
1491 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1492    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1493    is the const offset stripped from IV base; for other types use, both
1494    are zero by default.  */
1495
1496 static struct iv_use *
1497 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1498             gimple *stmt, enum use_type type, tree addr_base,
1499             unsigned HOST_WIDE_INT addr_offset)
1500 {
1501   struct iv_use *use = XCNEW (struct iv_use);
1502
1503   use->id = group->vuses.length ();
1504   use->group_id = group->id;
1505   use->type = type;
1506   use->iv = iv;
1507   use->stmt = stmt;
1508   use->op_p = use_p;
1509   use->addr_base = addr_base;
1510   use->addr_offset = addr_offset;
1511
1512   group->vuses.safe_push (use);
1513   return use;
1514 }
1515
1516 /* Checks whether OP is a loop-level invariant and if so, records it.
1517    NONLINEAR_USE is true if the invariant is used in a way we do not
1518    handle specially.  */
1519
1520 static void
1521 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1522 {
1523   basic_block bb;
1524   struct version_info *info;
1525
1526   if (TREE_CODE (op) != SSA_NAME
1527       || virtual_operand_p (op))
1528     return;
1529
1530   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1531   if (bb
1532       && flow_bb_inside_loop_p (data->current_loop, bb))
1533     return;
1534
1535   info = name_info (data, op);
1536   info->name = op;
1537   info->has_nonlin_use |= nonlinear_use;
1538   if (!info->inv_id)
1539     info->inv_id = ++data->max_inv_id;
1540   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1541 }
1542
1543 static tree
1544 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1545
1546 /* Record a group of TYPE.  */
1547
1548 static struct iv_group *
1549 record_group (struct ivopts_data *data, enum use_type type)
1550 {
1551   struct iv_group *group = XCNEW (struct iv_group);
1552
1553   group->id = data->vgroups.length ();
1554   group->type = type;
1555   group->related_cands = BITMAP_ALLOC (NULL);
1556   group->vuses.create (1);
1557
1558   data->vgroups.safe_push (group);
1559   return group;
1560 }
1561
1562 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1563    New group will be created if there is no existing group for the use.  */
1564
1565 static struct iv_use *
1566 record_group_use (struct ivopts_data *data, tree *use_p,
1567                   struct iv *iv, gimple *stmt, enum use_type type)
1568 {
1569   tree addr_base = NULL;
1570   struct iv_group *group = NULL;
1571   unsigned HOST_WIDE_INT addr_offset = 0;
1572
1573   /* Record non address type use in a new group.  */
1574   if (type == USE_ADDRESS && iv->base_object)
1575     {
1576       unsigned int i;
1577
1578       addr_base = strip_offset (iv->base, &addr_offset);
1579       for (i = 0; i < data->vgroups.length (); i++)
1580         {
1581           struct iv_use *use;
1582
1583           group = data->vgroups[i];
1584           use = group->vuses[0];
1585           if (use->type != USE_ADDRESS || !use->iv->base_object)
1586             continue;
1587
1588           /* Check if it has the same stripped base and step.  */
1589           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1590               && operand_equal_p (iv->step, use->iv->step, 0)
1591               && operand_equal_p (addr_base, use->addr_base, 0))
1592             break;
1593         }
1594       if (i == data->vgroups.length ())
1595         group = NULL;
1596     }
1597
1598   if (!group)
1599     group = record_group (data, type);
1600
1601   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1602 }
1603
1604 /* Checks whether the use OP is interesting and if so, records it.  */
1605
1606 static struct iv_use *
1607 find_interesting_uses_op (struct ivopts_data *data, tree op)
1608 {
1609   struct iv *iv;
1610   gimple *stmt;
1611   struct iv_use *use;
1612
1613   if (TREE_CODE (op) != SSA_NAME)
1614     return NULL;
1615
1616   iv = get_iv (data, op);
1617   if (!iv)
1618     return NULL;
1619
1620   if (iv->nonlin_use)
1621     {
1622       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1623       return iv->nonlin_use;
1624     }
1625
1626   if (integer_zerop (iv->step))
1627     {
1628       record_invariant (data, op, true);
1629       return NULL;
1630     }
1631
1632   stmt = SSA_NAME_DEF_STMT (op);
1633   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1634
1635   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1636   iv->nonlin_use = use;
1637   return use;
1638 }
1639
1640 /* Given a condition in statement STMT, checks whether it is a compare
1641    of an induction variable and an invariant.  If this is the case,
1642    CONTROL_VAR is set to location of the iv, BOUND to the location of
1643    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1644    induction variable descriptions, and true is returned.  If this is not
1645    the case, CONTROL_VAR and BOUND are set to the arguments of the
1646    condition and false is returned.  */
1647
1648 static bool
1649 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1650                        tree **control_var, tree **bound,
1651                        struct iv **iv_var, struct iv **iv_bound)
1652 {
1653   /* The objects returned when COND has constant operands.  */
1654   static struct iv const_iv;
1655   static tree zero;
1656   tree *op0 = &zero, *op1 = &zero;
1657   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1658   bool ret = false;
1659
1660   if (gimple_code (stmt) == GIMPLE_COND)
1661     {
1662       gcond *cond_stmt = as_a <gcond *> (stmt);
1663       op0 = gimple_cond_lhs_ptr (cond_stmt);
1664       op1 = gimple_cond_rhs_ptr (cond_stmt);
1665     }
1666   else
1667     {
1668       op0 = gimple_assign_rhs1_ptr (stmt);
1669       op1 = gimple_assign_rhs2_ptr (stmt);
1670     }
1671
1672   zero = integer_zero_node;
1673   const_iv.step = integer_zero_node;
1674
1675   if (TREE_CODE (*op0) == SSA_NAME)
1676     iv0 = get_iv (data, *op0);
1677   if (TREE_CODE (*op1) == SSA_NAME)
1678     iv1 = get_iv (data, *op1);
1679
1680   /* Exactly one of the compared values must be an iv, and the other one must
1681      be an invariant.  */
1682   if (!iv0 || !iv1)
1683     goto end;
1684
1685   if (integer_zerop (iv0->step))
1686     {
1687       /* Control variable may be on the other side.  */
1688       std::swap (op0, op1);
1689       std::swap (iv0, iv1);
1690     }
1691   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1692
1693 end:
1694   if (control_var)
1695     *control_var = op0;
1696   if (iv_var)
1697     *iv_var = iv0;
1698   if (bound)
1699     *bound = op1;
1700   if (iv_bound)
1701     *iv_bound = iv1;
1702
1703   return ret;
1704 }
1705
1706 /* Checks whether the condition in STMT is interesting and if so,
1707    records it.  */
1708
1709 static void
1710 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1711 {
1712   tree *var_p, *bound_p;
1713   struct iv *var_iv;
1714
1715   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1716     {
1717       find_interesting_uses_op (data, *var_p);
1718       find_interesting_uses_op (data, *bound_p);
1719       return;
1720     }
1721
1722   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1723 }
1724
1725 /* Returns the outermost loop EXPR is obviously invariant in
1726    relative to the loop LOOP, i.e. if all its operands are defined
1727    outside of the returned loop.  Returns NULL if EXPR is not
1728    even obviously invariant in LOOP.  */
1729
1730 struct loop *
1731 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1732 {
1733   basic_block def_bb;
1734   unsigned i, len;
1735
1736   if (is_gimple_min_invariant (expr))
1737     return current_loops->tree_root;
1738
1739   if (TREE_CODE (expr) == SSA_NAME)
1740     {
1741       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1742       if (def_bb)
1743         {
1744           if (flow_bb_inside_loop_p (loop, def_bb))
1745             return NULL;
1746           return superloop_at_depth (loop,
1747                                      loop_depth (def_bb->loop_father) + 1);
1748         }
1749
1750       return current_loops->tree_root;
1751     }
1752
1753   if (!EXPR_P (expr))
1754     return NULL;
1755
1756   unsigned maxdepth = 0;
1757   len = TREE_OPERAND_LENGTH (expr);
1758   for (i = 0; i < len; i++)
1759     {
1760       struct loop *ivloop;
1761       if (!TREE_OPERAND (expr, i))
1762         continue;
1763
1764       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1765       if (!ivloop)
1766         return NULL;
1767       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1768     }
1769
1770   return superloop_at_depth (loop, maxdepth);
1771 }
1772
1773 /* Returns true if expression EXPR is obviously invariant in LOOP,
1774    i.e. if all its operands are defined outside of the LOOP.  LOOP
1775    should not be the function body.  */
1776
1777 bool
1778 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1779 {
1780   basic_block def_bb;
1781   unsigned i, len;
1782
1783   gcc_assert (loop_depth (loop) > 0);
1784
1785   if (is_gimple_min_invariant (expr))
1786     return true;
1787
1788   if (TREE_CODE (expr) == SSA_NAME)
1789     {
1790       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1791       if (def_bb
1792           && flow_bb_inside_loop_p (loop, def_bb))
1793         return false;
1794
1795       return true;
1796     }
1797
1798   if (!EXPR_P (expr))
1799     return false;
1800
1801   len = TREE_OPERAND_LENGTH (expr);
1802   for (i = 0; i < len; i++)
1803     if (TREE_OPERAND (expr, i)
1804         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1805       return false;
1806
1807   return true;
1808 }
1809
1810 /* Given expression EXPR which computes inductive values with respect
1811    to loop recorded in DATA, this function returns biv from which EXPR
1812    is derived by tracing definition chains of ssa variables in EXPR.  */
1813
1814 static struct iv*
1815 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1816 {
1817   struct iv *iv;
1818   unsigned i, n;
1819   tree e2, e1;
1820   enum tree_code code;
1821   gimple *stmt;
1822
1823   if (expr == NULL_TREE)
1824     return NULL;
1825
1826   if (is_gimple_min_invariant (expr))
1827     return NULL;
1828
1829   code = TREE_CODE (expr);
1830   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1831     {
1832       n = TREE_OPERAND_LENGTH (expr);
1833       for (i = 0; i < n; i++)
1834         {
1835           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1836           if (iv)
1837             return iv;
1838         }
1839     }
1840
1841   /* Stop if it's not ssa name.  */
1842   if (code != SSA_NAME)
1843     return NULL;
1844
1845   iv = get_iv (data, expr);
1846   if (!iv || integer_zerop (iv->step))
1847     return NULL;
1848   else if (iv->biv_p)
1849     return iv;
1850
1851   stmt = SSA_NAME_DEF_STMT (expr);
1852   if (gphi *phi = dyn_cast <gphi *> (stmt))
1853     {
1854       ssa_op_iter iter;
1855       use_operand_p use_p;
1856       basic_block phi_bb = gimple_bb (phi);
1857
1858       /* Skip loop header PHI that doesn't define biv.  */
1859       if (phi_bb->loop_father == data->current_loop)
1860         return NULL;
1861
1862       if (virtual_operand_p (gimple_phi_result (phi)))
1863         return NULL;
1864
1865       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1866         {
1867           tree use = USE_FROM_PTR (use_p);
1868           iv = find_deriving_biv_for_expr (data, use);
1869           if (iv)
1870             return iv;
1871         }
1872       return NULL;
1873     }
1874   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1875     return NULL;
1876
1877   e1 = gimple_assign_rhs1 (stmt);
1878   code = gimple_assign_rhs_code (stmt);
1879   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1880     return find_deriving_biv_for_expr (data, e1);
1881
1882   switch (code)
1883     {
1884     case MULT_EXPR:
1885     case PLUS_EXPR:
1886     case MINUS_EXPR:
1887     case POINTER_PLUS_EXPR:
1888       /* Increments, decrements and multiplications by a constant
1889          are simple.  */
1890       e2 = gimple_assign_rhs2 (stmt);
1891       iv = find_deriving_biv_for_expr (data, e2);
1892       if (iv)
1893         return iv;
1894       gcc_fallthrough ();
1895
1896     CASE_CONVERT:
1897       /* Casts are simple.  */
1898       return find_deriving_biv_for_expr (data, e1);
1899
1900     default:
1901       break;
1902     }
1903
1904   return NULL;
1905 }
1906
1907 /* Record BIV, its predecessor and successor that they are used in
1908    address type uses.  */
1909
1910 static void
1911 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1912 {
1913   unsigned i;
1914   tree type, base_1, base_2;
1915   bitmap_iterator bi;
1916
1917   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1918       || biv->have_address_use || !biv->no_overflow)
1919     return;
1920
1921   type = TREE_TYPE (biv->base);
1922   if (!INTEGRAL_TYPE_P (type))
1923     return;
1924
1925   biv->have_address_use = true;
1926   data->bivs_not_used_in_addr--;
1927   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1928   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1929     {
1930       struct iv *iv = ver_info (data, i)->iv;
1931
1932       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1933           || iv->have_address_use || !iv->no_overflow)
1934         continue;
1935
1936       if (type != TREE_TYPE (iv->base)
1937           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1938         continue;
1939
1940       if (!operand_equal_p (biv->step, iv->step, 0))
1941         continue;
1942
1943       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1944       if (operand_equal_p (base_1, iv->base, 0)
1945           || operand_equal_p (base_2, biv->base, 0))
1946         {
1947           iv->have_address_use = true;
1948           data->bivs_not_used_in_addr--;
1949         }
1950     }
1951 }
1952
1953 /* Cumulates the steps of indices into DATA and replaces their values with the
1954    initial ones.  Returns false when the value of the index cannot be determined.
1955    Callback for for_each_index.  */
1956
1957 struct ifs_ivopts_data
1958 {
1959   struct ivopts_data *ivopts_data;
1960   gimple *stmt;
1961   tree step;
1962 };
1963
1964 static bool
1965 idx_find_step (tree base, tree *idx, void *data)
1966 {
1967   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1968   struct iv *iv;
1969   bool use_overflow_semantics = false;
1970   tree step, iv_base, iv_step, lbound, off;
1971   struct loop *loop = dta->ivopts_data->current_loop;
1972
1973   /* If base is a component ref, require that the offset of the reference
1974      be invariant.  */
1975   if (TREE_CODE (base) == COMPONENT_REF)
1976     {
1977       off = component_ref_field_offset (base);
1978       return expr_invariant_in_loop_p (loop, off);
1979     }
1980
1981   /* If base is array, first check whether we will be able to move the
1982      reference out of the loop (in order to take its address in strength
1983      reduction).  In order for this to work we need both lower bound
1984      and step to be loop invariants.  */
1985   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1986     {
1987       /* Moreover, for a range, the size needs to be invariant as well.  */
1988       if (TREE_CODE (base) == ARRAY_RANGE_REF
1989           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1990         return false;
1991
1992       step = array_ref_element_size (base);
1993       lbound = array_ref_low_bound (base);
1994
1995       if (!expr_invariant_in_loop_p (loop, step)
1996           || !expr_invariant_in_loop_p (loop, lbound))
1997         return false;
1998     }
1999
2000   if (TREE_CODE (*idx) != SSA_NAME)
2001     return true;
2002
2003   iv = get_iv (dta->ivopts_data, *idx);
2004   if (!iv)
2005     return false;
2006
2007   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2008           *&x[0], which is not folded and does not trigger the
2009           ARRAY_REF path below.  */
2010   *idx = iv->base;
2011
2012   if (integer_zerop (iv->step))
2013     return true;
2014
2015   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2016     {
2017       step = array_ref_element_size (base);
2018
2019       /* We only handle addresses whose step is an integer constant.  */
2020       if (TREE_CODE (step) != INTEGER_CST)
2021         return false;
2022     }
2023   else
2024     /* The step for pointer arithmetics already is 1 byte.  */
2025     step = size_one_node;
2026
2027   iv_base = iv->base;
2028   iv_step = iv->step;
2029   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2030     use_overflow_semantics = true;
2031
2032   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2033                             sizetype, &iv_base, &iv_step, dta->stmt,
2034                             use_overflow_semantics))
2035     {
2036       /* The index might wrap.  */
2037       return false;
2038     }
2039
2040   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2041   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2042
2043   if (dta->ivopts_data->bivs_not_used_in_addr)
2044     {
2045       if (!iv->biv_p)
2046         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2047
2048       record_biv_for_address_use (dta->ivopts_data, iv);
2049     }
2050   return true;
2051 }
2052
2053 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2054    object is passed to it in DATA.  */
2055
2056 static bool
2057 idx_record_use (tree base, tree *idx,
2058                 void *vdata)
2059 {
2060   struct ivopts_data *data = (struct ivopts_data *) vdata;
2061   find_interesting_uses_op (data, *idx);
2062   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2063     {
2064       find_interesting_uses_op (data, array_ref_element_size (base));
2065       find_interesting_uses_op (data, array_ref_low_bound (base));
2066     }
2067   return true;
2068 }
2069
2070 /* If we can prove that TOP = cst * BOT for some constant cst,
2071    store cst to MUL and return true.  Otherwise return false.
2072    The returned value is always sign-extended, regardless of the
2073    signedness of TOP and BOT.  */
2074
2075 static bool
2076 constant_multiple_of (tree top, tree bot, widest_int *mul)
2077 {
2078   tree mby;
2079   enum tree_code code;
2080   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2081   widest_int res, p0, p1;
2082
2083   STRIP_NOPS (top);
2084   STRIP_NOPS (bot);
2085
2086   if (operand_equal_p (top, bot, 0))
2087     {
2088       *mul = 1;
2089       return true;
2090     }
2091
2092   code = TREE_CODE (top);
2093   switch (code)
2094     {
2095     case MULT_EXPR:
2096       mby = TREE_OPERAND (top, 1);
2097       if (TREE_CODE (mby) != INTEGER_CST)
2098         return false;
2099
2100       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2101         return false;
2102
2103       *mul = wi::sext (res * wi::to_widest (mby), precision);
2104       return true;
2105
2106     case PLUS_EXPR:
2107     case MINUS_EXPR:
2108       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2109           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2110         return false;
2111
2112       if (code == MINUS_EXPR)
2113         p1 = -p1;
2114       *mul = wi::sext (p0 + p1, precision);
2115       return true;
2116
2117     case INTEGER_CST:
2118       if (TREE_CODE (bot) != INTEGER_CST)
2119         return false;
2120
2121       p0 = widest_int::from (top, SIGNED);
2122       p1 = widest_int::from (bot, SIGNED);
2123       if (p1 == 0)
2124         return false;
2125       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2126       return res == 0;
2127
2128     default:
2129       return false;
2130     }
2131 }
2132
2133 /* Return true if memory reference REF with step STEP may be unaligned.  */
2134
2135 static bool
2136 may_be_unaligned_p (tree ref, tree step)
2137 {
2138   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2139      thus they are not misaligned.  */
2140   if (TREE_CODE (ref) == TARGET_MEM_REF)
2141     return false;
2142
2143   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2144   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2145     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2146
2147   unsigned HOST_WIDE_INT bitpos;
2148   unsigned int ref_align;
2149   get_object_alignment_1 (ref, &ref_align, &bitpos);
2150   if (ref_align < align
2151       || (bitpos % align) != 0
2152       || (bitpos % BITS_PER_UNIT) != 0)
2153     return true;
2154
2155   unsigned int trailing_zeros = tree_ctz (step);
2156   if (trailing_zeros < HOST_BITS_PER_INT
2157       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2158     return true;
2159
2160   return false;
2161 }
2162
2163 /* Return true if EXPR may be non-addressable.   */
2164
2165 bool
2166 may_be_nonaddressable_p (tree expr)
2167 {
2168   switch (TREE_CODE (expr))
2169     {
2170     case TARGET_MEM_REF:
2171       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2172          target, thus they are always addressable.  */
2173       return false;
2174
2175     case MEM_REF:
2176       /* Likewise for MEM_REFs, modulo the storage order.  */
2177       return REF_REVERSE_STORAGE_ORDER (expr);
2178
2179     case BIT_FIELD_REF:
2180       if (REF_REVERSE_STORAGE_ORDER (expr))
2181         return true;
2182       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2183
2184     case COMPONENT_REF:
2185       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2186         return true;
2187       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2188              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2189
2190     case ARRAY_REF:
2191     case ARRAY_RANGE_REF:
2192       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2193         return true;
2194       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2195
2196     case VIEW_CONVERT_EXPR:
2197       /* This kind of view-conversions may wrap non-addressable objects
2198          and make them look addressable.  After some processing the
2199          non-addressability may be uncovered again, causing ADDR_EXPRs
2200          of inappropriate objects to be built.  */
2201       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2202           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2203         return true;
2204       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2205
2206     CASE_CONVERT:
2207       return true;
2208
2209     default:
2210       break;
2211     }
2212
2213   return false;
2214 }
2215
2216 /* Finds addresses in *OP_P inside STMT.  */
2217
2218 static void
2219 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2220                                tree *op_p)
2221 {
2222   tree base = *op_p, step = size_zero_node;
2223   struct iv *civ;
2224   struct ifs_ivopts_data ifs_ivopts_data;
2225
2226   /* Do not play with volatile memory references.  A bit too conservative,
2227      perhaps, but safe.  */
2228   if (gimple_has_volatile_ops (stmt))
2229     goto fail;
2230
2231   /* Ignore bitfields for now.  Not really something terribly complicated
2232      to handle.  TODO.  */
2233   if (TREE_CODE (base) == BIT_FIELD_REF)
2234     goto fail;
2235
2236   base = unshare_expr (base);
2237
2238   if (TREE_CODE (base) == TARGET_MEM_REF)
2239     {
2240       tree type = build_pointer_type (TREE_TYPE (base));
2241       tree astep;
2242
2243       if (TMR_BASE (base)
2244           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2245         {
2246           civ = get_iv (data, TMR_BASE (base));
2247           if (!civ)
2248             goto fail;
2249
2250           TMR_BASE (base) = civ->base;
2251           step = civ->step;
2252         }
2253       if (TMR_INDEX2 (base)
2254           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2255         {
2256           civ = get_iv (data, TMR_INDEX2 (base));
2257           if (!civ)
2258             goto fail;
2259
2260           TMR_INDEX2 (base) = civ->base;
2261           step = civ->step;
2262         }
2263       if (TMR_INDEX (base)
2264           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2265         {
2266           civ = get_iv (data, TMR_INDEX (base));
2267           if (!civ)
2268             goto fail;
2269
2270           TMR_INDEX (base) = civ->base;
2271           astep = civ->step;
2272
2273           if (astep)
2274             {
2275               if (TMR_STEP (base))
2276                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2277
2278               step = fold_build2 (PLUS_EXPR, type, step, astep);
2279             }
2280         }
2281
2282       if (integer_zerop (step))
2283         goto fail;
2284       base = tree_mem_ref_addr (type, base);
2285     }
2286   else
2287     {
2288       ifs_ivopts_data.ivopts_data = data;
2289       ifs_ivopts_data.stmt = stmt;
2290       ifs_ivopts_data.step = size_zero_node;
2291       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2292           || integer_zerop (ifs_ivopts_data.step))
2293         goto fail;
2294       step = ifs_ivopts_data.step;
2295
2296       /* Check that the base expression is addressable.  This needs
2297          to be done after substituting bases of IVs into it.  */
2298       if (may_be_nonaddressable_p (base))
2299         goto fail;
2300
2301       /* Moreover, on strict alignment platforms, check that it is
2302          sufficiently aligned.  */
2303       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2304         goto fail;
2305
2306       base = build_fold_addr_expr (base);
2307
2308       /* Substituting bases of IVs into the base expression might
2309          have caused folding opportunities.  */
2310       if (TREE_CODE (base) == ADDR_EXPR)
2311         {
2312           tree *ref = &TREE_OPERAND (base, 0);
2313           while (handled_component_p (*ref))
2314             ref = &TREE_OPERAND (*ref, 0);
2315           if (TREE_CODE (*ref) == MEM_REF)
2316             {
2317               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2318                                       TREE_OPERAND (*ref, 0),
2319                                       TREE_OPERAND (*ref, 1));
2320               if (tem)
2321                 *ref = tem;
2322             }
2323         }
2324     }
2325
2326   civ = alloc_iv (data, base, step);
2327   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2328   return;
2329
2330 fail:
2331   for_each_index (op_p, idx_record_use, data);
2332 }
2333
2334 /* Finds and records invariants used in STMT.  */
2335
2336 static void
2337 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2338 {
2339   ssa_op_iter iter;
2340   use_operand_p use_p;
2341   tree op;
2342
2343   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2344     {
2345       op = USE_FROM_PTR (use_p);
2346       record_invariant (data, op, false);
2347     }
2348 }
2349
2350 /* Finds interesting uses of induction variables in the statement STMT.  */
2351
2352 static void
2353 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2354 {
2355   struct iv *iv;
2356   tree op, *lhs, *rhs;
2357   ssa_op_iter iter;
2358   use_operand_p use_p;
2359   enum tree_code code;
2360
2361   find_invariants_stmt (data, stmt);
2362
2363   if (gimple_code (stmt) == GIMPLE_COND)
2364     {
2365       find_interesting_uses_cond (data, stmt);
2366       return;
2367     }
2368
2369   if (is_gimple_assign (stmt))
2370     {
2371       lhs = gimple_assign_lhs_ptr (stmt);
2372       rhs = gimple_assign_rhs1_ptr (stmt);
2373
2374       if (TREE_CODE (*lhs) == SSA_NAME)
2375         {
2376           /* If the statement defines an induction variable, the uses are not
2377              interesting by themselves.  */
2378
2379           iv = get_iv (data, *lhs);
2380
2381           if (iv && !integer_zerop (iv->step))
2382             return;
2383         }
2384
2385       code = gimple_assign_rhs_code (stmt);
2386       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2387           && (REFERENCE_CLASS_P (*rhs)
2388               || is_gimple_val (*rhs)))
2389         {
2390           if (REFERENCE_CLASS_P (*rhs))
2391             find_interesting_uses_address (data, stmt, rhs);
2392           else
2393             find_interesting_uses_op (data, *rhs);
2394
2395           if (REFERENCE_CLASS_P (*lhs))
2396             find_interesting_uses_address (data, stmt, lhs);
2397           return;
2398         }
2399       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2400         {
2401           find_interesting_uses_cond (data, stmt);
2402           return;
2403         }
2404
2405       /* TODO -- we should also handle address uses of type
2406
2407          memory = call (whatever);
2408
2409          and
2410
2411          call (memory).  */
2412     }
2413
2414   if (gimple_code (stmt) == GIMPLE_PHI
2415       && gimple_bb (stmt) == data->current_loop->header)
2416     {
2417       iv = get_iv (data, PHI_RESULT (stmt));
2418
2419       if (iv && !integer_zerop (iv->step))
2420         return;
2421     }
2422
2423   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2424     {
2425       op = USE_FROM_PTR (use_p);
2426
2427       if (TREE_CODE (op) != SSA_NAME)
2428         continue;
2429
2430       iv = get_iv (data, op);
2431       if (!iv)
2432         continue;
2433
2434       find_interesting_uses_op (data, op);
2435     }
2436 }
2437
2438 /* Finds interesting uses of induction variables outside of loops
2439    on loop exit edge EXIT.  */
2440
2441 static void
2442 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2443 {
2444   gphi *phi;
2445   gphi_iterator psi;
2446   tree def;
2447
2448   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2449     {
2450       phi = psi.phi ();
2451       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2452       if (!virtual_operand_p (def))
2453         find_interesting_uses_op (data, def);
2454     }
2455 }
2456
2457 /* Compute maximum offset of [base + offset] addressing mode
2458    for memory reference represented by USE.  */
2459
2460 static HOST_WIDE_INT
2461 compute_max_addr_offset (struct iv_use *use)
2462 {
2463   int width;
2464   rtx reg, addr;
2465   HOST_WIDE_INT i, off;
2466   unsigned list_index, num;
2467   addr_space_t as;
2468   machine_mode mem_mode, addr_mode;
2469   static vec<HOST_WIDE_INT> max_offset_list;
2470
2471   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2472   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2473
2474   num = max_offset_list.length ();
2475   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2476   if (list_index >= num)
2477     {
2478       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2479       for (; num < max_offset_list.length (); num++)
2480         max_offset_list[num] = -1;
2481     }
2482
2483   off = max_offset_list[list_index];
2484   if (off != -1)
2485     return off;
2486
2487   addr_mode = targetm.addr_space.address_mode (as);
2488   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2489   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2490
2491   width = GET_MODE_BITSIZE (addr_mode) - 1;
2492   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2493     width = HOST_BITS_PER_WIDE_INT - 1;
2494
2495   for (i = width; i > 0; i--)
2496     {
2497       off = (HOST_WIDE_INT_1U << i) - 1;
2498       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2499       if (memory_address_addr_space_p (mem_mode, addr, as))
2500         break;
2501
2502       /* For some strict-alignment targets, the offset must be naturally
2503          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2504       off = (HOST_WIDE_INT_1U << i);
2505       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2506         {
2507           off -= GET_MODE_SIZE (mem_mode);
2508           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2509           if (memory_address_addr_space_p (mem_mode, addr, as))
2510             break;
2511         }
2512     }
2513   if (i == 0)
2514     off = 0;
2515
2516   max_offset_list[list_index] = off;
2517   return off;
2518 }
2519
2520 /* Comparison function to sort group in ascending order of addr_offset.  */
2521
2522 static int
2523 group_compare_offset (const void *a, const void *b)
2524 {
2525   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2526   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2527
2528   if ((*u1)->addr_offset != (*u2)->addr_offset)
2529     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2530   else
2531     return 0;
2532 }
2533
2534 /* Check if small groups should be split.  Return true if no group
2535    contains more than two uses with distinct addr_offsets.  Return
2536    false otherwise.  We want to split such groups because:
2537
2538      1) Small groups don't have much benefit and may interfer with
2539         general candidate selection.
2540      2) Size for problem with only small groups is usually small and
2541         general algorithm can handle it well.
2542
2543    TODO -- Above claim may not hold when we want to merge memory
2544    accesses with conseuctive addresses.  */
2545
2546 static bool
2547 split_small_address_groups_p (struct ivopts_data *data)
2548 {
2549   unsigned int i, j, distinct = 1;
2550   struct iv_use *pre;
2551   struct iv_group *group;
2552
2553   for (i = 0; i < data->vgroups.length (); i++)
2554     {
2555       group = data->vgroups[i];
2556       if (group->vuses.length () == 1)
2557         continue;
2558
2559       gcc_assert (group->type == USE_ADDRESS);
2560       if (group->vuses.length () == 2)
2561         {
2562           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2563             std::swap (group->vuses[0], group->vuses[1]);
2564         }
2565       else
2566         group->vuses.qsort (group_compare_offset);
2567
2568       if (distinct > 2)
2569         continue;
2570
2571       distinct = 1;
2572       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2573         {
2574           if (group->vuses[j]->addr_offset != pre->addr_offset)
2575             {
2576               pre = group->vuses[j];
2577               distinct++;
2578             }
2579
2580           if (distinct > 2)
2581             break;
2582         }
2583     }
2584
2585   return (distinct <= 2);
2586 }
2587
2588 /* For each group of address type uses, this function further groups
2589    these uses according to the maximum offset supported by target's
2590    [base + offset] addressing mode.  */
2591
2592 static void
2593 split_address_groups (struct ivopts_data *data)
2594 {
2595   unsigned int i, j;
2596   HOST_WIDE_INT max_offset = -1;
2597
2598   /* Reset max offset to split all small groups.  */
2599   if (split_small_address_groups_p (data))
2600     max_offset = 0;
2601
2602   for (i = 0; i < data->vgroups.length (); i++)
2603     {
2604       struct iv_group *group = data->vgroups[i];
2605       struct iv_use *use = group->vuses[0];
2606
2607       use->id = 0;
2608       use->group_id = group->id;
2609       if (group->vuses.length () == 1)
2610         continue;
2611
2612       if (max_offset != 0)
2613         max_offset = compute_max_addr_offset (use);
2614
2615       for (j = 1; j < group->vuses.length (); j++)
2616         {
2617           struct iv_use *next = group->vuses[j];
2618
2619           /* Only uses with offset that can fit in offset part against
2620              the first use can be grouped together.  */
2621           if (next->addr_offset - use->addr_offset
2622               > (unsigned HOST_WIDE_INT) max_offset)
2623             break;
2624
2625           next->id = j;
2626           next->group_id = group->id;
2627         }
2628       /* Split group.  */
2629       if (j < group->vuses.length ())
2630         {
2631           struct iv_group *new_group = record_group (data, group->type);
2632           new_group->vuses.safe_splice (group->vuses);
2633           new_group->vuses.block_remove (0, j);
2634           group->vuses.truncate (j);
2635         }
2636     }
2637 }
2638
2639 /* Finds uses of the induction variables that are interesting.  */
2640
2641 static void
2642 find_interesting_uses (struct ivopts_data *data)
2643 {
2644   basic_block bb;
2645   gimple_stmt_iterator bsi;
2646   basic_block *body = get_loop_body (data->current_loop);
2647   unsigned i;
2648   edge e;
2649
2650   for (i = 0; i < data->current_loop->num_nodes; i++)
2651     {
2652       edge_iterator ei;
2653       bb = body[i];
2654
2655       FOR_EACH_EDGE (e, ei, bb->succs)
2656         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2657             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2658           find_interesting_uses_outside (data, e);
2659
2660       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2661         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2662       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2663         if (!is_gimple_debug (gsi_stmt (bsi)))
2664           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2665     }
2666
2667   split_address_groups (data);
2668
2669   if (dump_file && (dump_flags & TDF_DETAILS))
2670     {
2671       bitmap_iterator bi;
2672
2673       fprintf (dump_file, "\n<Invariant Vars>:\n");
2674       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2675         {
2676           struct version_info *info = ver_info (data, i);
2677           if (info->inv_id)
2678             {
2679               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2680               print_generic_expr (dump_file, info->name, TDF_SLIM);
2681               fprintf (dump_file, "%s\n",
2682                        info->has_nonlin_use ? "" : "\t(eliminable)");
2683             }
2684         }
2685
2686       fprintf (dump_file, "\n<IV Groups>:\n");
2687       dump_groups (dump_file, data);
2688       fprintf (dump_file, "\n");
2689     }
2690
2691   free (body);
2692 }
2693
2694 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2695    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2696    we are at the top-level of the processed address.  */
2697
2698 static tree
2699 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2700                 HOST_WIDE_INT *offset)
2701 {
2702   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2703   enum tree_code code;
2704   tree type, orig_type = TREE_TYPE (expr);
2705   HOST_WIDE_INT off0, off1, st;
2706   tree orig_expr = expr;
2707
2708   STRIP_NOPS (expr);
2709
2710   type = TREE_TYPE (expr);
2711   code = TREE_CODE (expr);
2712   *offset = 0;
2713
2714   switch (code)
2715     {
2716     case INTEGER_CST:
2717       if (!cst_and_fits_in_hwi (expr)
2718           || integer_zerop (expr))
2719         return orig_expr;
2720
2721       *offset = int_cst_value (expr);
2722       return build_int_cst (orig_type, 0);
2723
2724     case POINTER_PLUS_EXPR:
2725     case PLUS_EXPR:
2726     case MINUS_EXPR:
2727       op0 = TREE_OPERAND (expr, 0);
2728       op1 = TREE_OPERAND (expr, 1);
2729
2730       op0 = strip_offset_1 (op0, false, false, &off0);
2731       op1 = strip_offset_1 (op1, false, false, &off1);
2732
2733       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2734       if (op0 == TREE_OPERAND (expr, 0)
2735           && op1 == TREE_OPERAND (expr, 1))
2736         return orig_expr;
2737
2738       if (integer_zerop (op1))
2739         expr = op0;
2740       else if (integer_zerop (op0))
2741         {
2742           if (code == MINUS_EXPR)
2743             expr = fold_build1 (NEGATE_EXPR, type, op1);
2744           else
2745             expr = op1;
2746         }
2747       else
2748         expr = fold_build2 (code, type, op0, op1);
2749
2750       return fold_convert (orig_type, expr);
2751
2752     case MULT_EXPR:
2753       op1 = TREE_OPERAND (expr, 1);
2754       if (!cst_and_fits_in_hwi (op1))
2755         return orig_expr;
2756
2757       op0 = TREE_OPERAND (expr, 0);
2758       op0 = strip_offset_1 (op0, false, false, &off0);
2759       if (op0 == TREE_OPERAND (expr, 0))
2760         return orig_expr;
2761
2762       *offset = off0 * int_cst_value (op1);
2763       if (integer_zerop (op0))
2764         expr = op0;
2765       else
2766         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2767
2768       return fold_convert (orig_type, expr);
2769
2770     case ARRAY_REF:
2771     case ARRAY_RANGE_REF:
2772       if (!inside_addr)
2773         return orig_expr;
2774
2775       step = array_ref_element_size (expr);
2776       if (!cst_and_fits_in_hwi (step))
2777         break;
2778
2779       st = int_cst_value (step);
2780       op1 = TREE_OPERAND (expr, 1);
2781       op1 = strip_offset_1 (op1, false, false, &off1);
2782       *offset = off1 * st;
2783
2784       if (top_compref
2785           && integer_zerop (op1))
2786         {
2787           /* Strip the component reference completely.  */
2788           op0 = TREE_OPERAND (expr, 0);
2789           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2790           *offset += off0;
2791           return op0;
2792         }
2793       break;
2794
2795     case COMPONENT_REF:
2796       {
2797         tree field;
2798
2799         if (!inside_addr)
2800           return orig_expr;
2801
2802         tmp = component_ref_field_offset (expr);
2803         field = TREE_OPERAND (expr, 1);
2804         if (top_compref
2805             && cst_and_fits_in_hwi (tmp)
2806             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2807           {
2808             HOST_WIDE_INT boffset, abs_off;
2809
2810             /* Strip the component reference completely.  */
2811             op0 = TREE_OPERAND (expr, 0);
2812             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2813             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2814             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2815             if (boffset < 0)
2816               abs_off = -abs_off;
2817
2818             *offset = off0 + int_cst_value (tmp) + abs_off;
2819             return op0;
2820           }
2821       }
2822       break;
2823
2824     case ADDR_EXPR:
2825       op0 = TREE_OPERAND (expr, 0);
2826       op0 = strip_offset_1 (op0, true, true, &off0);
2827       *offset += off0;
2828
2829       if (op0 == TREE_OPERAND (expr, 0))
2830         return orig_expr;
2831
2832       expr = build_fold_addr_expr (op0);
2833       return fold_convert (orig_type, expr);
2834
2835     case MEM_REF:
2836       /* ???  Offset operand?  */
2837       inside_addr = false;
2838       break;
2839
2840     default:
2841       return orig_expr;
2842     }
2843
2844   /* Default handling of expressions for that we want to recurse into
2845      the first operand.  */
2846   op0 = TREE_OPERAND (expr, 0);
2847   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2848   *offset += off0;
2849
2850   if (op0 == TREE_OPERAND (expr, 0)
2851       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2852     return orig_expr;
2853
2854   expr = copy_node (expr);
2855   TREE_OPERAND (expr, 0) = op0;
2856   if (op1)
2857     TREE_OPERAND (expr, 1) = op1;
2858
2859   /* Inside address, we might strip the top level component references,
2860      thus changing type of the expression.  Handling of ADDR_EXPR
2861      will fix that.  */
2862   expr = fold_convert (orig_type, expr);
2863
2864   return expr;
2865 }
2866
2867 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2868
2869 static tree
2870 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2871 {
2872   HOST_WIDE_INT off;
2873   tree core = strip_offset_1 (expr, false, false, &off);
2874   *offset = off;
2875   return core;
2876 }
2877
2878 /* Returns variant of TYPE that can be used as base for different uses.
2879    We return unsigned type with the same precision, which avoids problems
2880    with overflows.  */
2881
2882 static tree
2883 generic_type_for (tree type)
2884 {
2885   if (POINTER_TYPE_P (type))
2886     return unsigned_type_for (type);
2887
2888   if (TYPE_UNSIGNED (type))
2889     return type;
2890
2891   return unsigned_type_for (type);
2892 }
2893
2894 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2895    the bitmap to that we should store it.  */
2896
2897 static struct ivopts_data *fd_ivopts_data;
2898 static tree
2899 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2900 {
2901   bitmap *depends_on = (bitmap *) data;
2902   struct version_info *info;
2903
2904   if (TREE_CODE (*expr_p) != SSA_NAME)
2905     return NULL_TREE;
2906   info = name_info (fd_ivopts_data, *expr_p);
2907
2908   if (!info->inv_id || info->has_nonlin_use)
2909     return NULL_TREE;
2910
2911   if (!*depends_on)
2912     *depends_on = BITMAP_ALLOC (NULL);
2913   bitmap_set_bit (*depends_on, info->inv_id);
2914
2915   return NULL_TREE;
2916 }
2917
2918 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2919    position to POS.  If USE is not NULL, the candidate is set as related to
2920    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2921    replacement of the final value of the iv by a direct computation.  */
2922
2923 static struct iv_cand *
2924 add_candidate_1 (struct ivopts_data *data,
2925                  tree base, tree step, bool important, enum iv_position pos,
2926                  struct iv_use *use, gimple *incremented_at,
2927                  struct iv *orig_iv = NULL)
2928 {
2929   unsigned i;
2930   struct iv_cand *cand = NULL;
2931   tree type, orig_type;
2932
2933   gcc_assert (base && step);
2934
2935   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2936      live, but the ivopts code may replace a real pointer with one
2937      pointing before or after the memory block that is then adjusted
2938      into the memory block during the loop.  FIXME: It would likely be
2939      better to actually force the pointer live and still use ivopts;
2940      for example, it would be enough to write the pointer into memory
2941      and keep it there until after the loop.  */
2942   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2943     return NULL;
2944
2945   /* For non-original variables, make sure their values are computed in a type
2946      that does not invoke undefined behavior on overflows (since in general,
2947      we cannot prove that these induction variables are non-wrapping).  */
2948   if (pos != IP_ORIGINAL)
2949     {
2950       orig_type = TREE_TYPE (base);
2951       type = generic_type_for (orig_type);
2952       if (type != orig_type)
2953         {
2954           base = fold_convert (type, base);
2955           step = fold_convert (type, step);
2956         }
2957     }
2958
2959   for (i = 0; i < data->vcands.length (); i++)
2960     {
2961       cand = data->vcands[i];
2962
2963       if (cand->pos != pos)
2964         continue;
2965
2966       if (cand->incremented_at != incremented_at
2967           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2968               && cand->ainc_use != use))
2969         continue;
2970
2971       if (operand_equal_p (base, cand->iv->base, 0)
2972           && operand_equal_p (step, cand->iv->step, 0)
2973           && (TYPE_PRECISION (TREE_TYPE (base))
2974               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2975         break;
2976     }
2977
2978   if (i == data->vcands.length ())
2979     {
2980       cand = XCNEW (struct iv_cand);
2981       cand->id = i;
2982       cand->iv = alloc_iv (data, base, step);
2983       cand->pos = pos;
2984       if (pos != IP_ORIGINAL)
2985         {
2986           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2987           cand->var_after = cand->var_before;
2988         }
2989       cand->important = important;
2990       cand->incremented_at = incremented_at;
2991       data->vcands.safe_push (cand);
2992
2993       if (TREE_CODE (step) != INTEGER_CST)
2994         {
2995           fd_ivopts_data = data;
2996           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2997         }
2998
2999       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3000         cand->ainc_use = use;
3001       else
3002         cand->ainc_use = NULL;
3003
3004       cand->orig_iv = orig_iv;
3005       if (dump_file && (dump_flags & TDF_DETAILS))
3006         dump_cand (dump_file, cand);
3007     }
3008
3009   cand->important |= important;
3010
3011   /* Relate candidate to the group for which it is added.  */
3012   if (use)
3013     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3014
3015   return cand;
3016 }
3017
3018 /* Returns true if incrementing the induction variable at the end of the LOOP
3019    is allowed.
3020
3021    The purpose is to avoid splitting latch edge with a biv increment, thus
3022    creating a jump, possibly confusing other optimization passes and leaving
3023    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
3024    is not available (so we do not have a better alternative), or if the latch
3025    edge is already nonempty.  */
3026
3027 static bool
3028 allow_ip_end_pos_p (struct loop *loop)
3029 {
3030   if (!ip_normal_pos (loop))
3031     return true;
3032
3033   if (!empty_block_p (ip_end_pos (loop)))
3034     return true;
3035
3036   return false;
3037 }
3038
3039 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3040    Important field is set to IMPORTANT.  */
3041
3042 static void
3043 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3044                         bool important, struct iv_use *use)
3045 {
3046   basic_block use_bb = gimple_bb (use->stmt);
3047   machine_mode mem_mode;
3048   unsigned HOST_WIDE_INT cstepi;
3049
3050   /* If we insert the increment in any position other than the standard
3051      ones, we must ensure that it is incremented once per iteration.
3052      It must not be in an inner nested loop, or one side of an if
3053      statement.  */
3054   if (use_bb->loop_father != data->current_loop
3055       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3056       || stmt_could_throw_p (use->stmt)
3057       || !cst_and_fits_in_hwi (step))
3058     return;
3059
3060   cstepi = int_cst_value (step);
3061
3062   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3063   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3064         || USE_STORE_PRE_INCREMENT (mem_mode))
3065        && GET_MODE_SIZE (mem_mode) == cstepi)
3066       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3067            || USE_STORE_PRE_DECREMENT (mem_mode))
3068           && GET_MODE_SIZE (mem_mode) == -cstepi))
3069     {
3070       enum tree_code code = MINUS_EXPR;
3071       tree new_base;
3072       tree new_step = step;
3073
3074       if (POINTER_TYPE_P (TREE_TYPE (base)))
3075         {
3076           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3077           code = POINTER_PLUS_EXPR;
3078         }
3079       else
3080         new_step = fold_convert (TREE_TYPE (base), new_step);
3081       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3082       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3083                        use->stmt);
3084     }
3085   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3086         || USE_STORE_POST_INCREMENT (mem_mode))
3087        && GET_MODE_SIZE (mem_mode) == cstepi)
3088       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3089            || USE_STORE_POST_DECREMENT (mem_mode))
3090           && GET_MODE_SIZE (mem_mode) == -cstepi))
3091     {
3092       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3093                        use->stmt);
3094     }
3095 }
3096
3097 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3098    position to POS.  If USE is not NULL, the candidate is set as related to
3099    it.  The candidate computation is scheduled before exit condition and at
3100    the end of loop.  */
3101
3102 static void
3103 add_candidate (struct ivopts_data *data,
3104                tree base, tree step, bool important, struct iv_use *use,
3105                struct iv *orig_iv = NULL)
3106 {
3107   if (ip_normal_pos (data->current_loop))
3108     add_candidate_1 (data, base, step, important,
3109                      IP_NORMAL, use, NULL, orig_iv);
3110   if (ip_end_pos (data->current_loop)
3111       && allow_ip_end_pos_p (data->current_loop))
3112     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3113 }
3114
3115 /* Adds standard iv candidates.  */
3116
3117 static void
3118 add_standard_iv_candidates (struct ivopts_data *data)
3119 {
3120   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3121
3122   /* The same for a double-integer type if it is still fast enough.  */
3123   if (TYPE_PRECISION
3124         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3125       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3126     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3127                    build_int_cst (long_integer_type_node, 1), true, NULL);
3128
3129   /* The same for a double-integer type if it is still fast enough.  */
3130   if (TYPE_PRECISION
3131         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3132       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3133     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3134                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3135 }
3136
3137
3138 /* Adds candidates bases on the old induction variable IV.  */
3139
3140 static void
3141 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3142 {
3143   gimple *phi;
3144   tree def;
3145   struct iv_cand *cand;
3146
3147   /* Check if this biv is used in address type use.  */
3148   if (iv->no_overflow  && iv->have_address_use
3149       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3150       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3151     {
3152       tree base = fold_convert (sizetype, iv->base);
3153       tree step = fold_convert (sizetype, iv->step);
3154
3155       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3156       add_candidate (data, base, step, true, NULL, iv);
3157       /* Add iv cand of the original type only if it has nonlinear use.  */
3158       if (iv->nonlin_use)
3159         add_candidate (data, iv->base, iv->step, true, NULL);
3160     }
3161   else
3162     add_candidate (data, iv->base, iv->step, true, NULL);
3163
3164   /* The same, but with initial value zero.  */
3165   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3166     add_candidate (data, size_int (0), iv->step, true, NULL);
3167   else
3168     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3169                    iv->step, true, NULL);
3170
3171   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3172   if (gimple_code (phi) == GIMPLE_PHI)
3173     {
3174       /* Additionally record the possibility of leaving the original iv
3175          untouched.  */
3176       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3177       /* Don't add candidate if it's from another PHI node because
3178          it's an affine iv appearing in the form of PEELED_CHREC.  */
3179       phi = SSA_NAME_DEF_STMT (def);
3180       if (gimple_code (phi) != GIMPLE_PHI)
3181         {
3182           cand = add_candidate_1 (data,
3183                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3184                                   SSA_NAME_DEF_STMT (def));
3185           if (cand)
3186             {
3187               cand->var_before = iv->ssa_name;
3188               cand->var_after = def;
3189             }
3190         }
3191       else
3192         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3193     }
3194 }
3195
3196 /* Adds candidates based on the old induction variables.  */
3197
3198 static void
3199 add_iv_candidate_for_bivs (struct ivopts_data *data)
3200 {
3201   unsigned i;
3202   struct iv *iv;
3203   bitmap_iterator bi;
3204
3205   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3206     {
3207       iv = ver_info (data, i)->iv;
3208       if (iv && iv->biv_p && !integer_zerop (iv->step))
3209         add_iv_candidate_for_biv (data, iv);
3210     }
3211 }
3212
3213 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3214
3215 static void
3216 record_common_cand (struct ivopts_data *data, tree base,
3217                     tree step, struct iv_use *use)
3218 {
3219   struct iv_common_cand ent;
3220   struct iv_common_cand **slot;
3221
3222   ent.base = base;
3223   ent.step = step;
3224   ent.hash = iterative_hash_expr (base, 0);
3225   ent.hash = iterative_hash_expr (step, ent.hash);
3226
3227   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3228   if (*slot == NULL)
3229     {
3230       *slot = new iv_common_cand ();
3231       (*slot)->base = base;
3232       (*slot)->step = step;
3233       (*slot)->uses.create (8);
3234       (*slot)->hash = ent.hash;
3235       data->iv_common_cands.safe_push ((*slot));
3236     }
3237
3238   gcc_assert (use != NULL);
3239   (*slot)->uses.safe_push (use);
3240   return;
3241 }
3242
3243 /* Comparison function used to sort common candidates.  */
3244
3245 static int
3246 common_cand_cmp (const void *p1, const void *p2)
3247 {
3248   unsigned n1, n2;
3249   const struct iv_common_cand *const *const ccand1
3250     = (const struct iv_common_cand *const *)p1;
3251   const struct iv_common_cand *const *const ccand2
3252     = (const struct iv_common_cand *const *)p2;
3253
3254   n1 = (*ccand1)->uses.length ();
3255   n2 = (*ccand2)->uses.length ();
3256   return n2 - n1;
3257 }
3258
3259 /* Adds IV candidates based on common candidated recorded.  */
3260
3261 static void
3262 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3263 {
3264   unsigned i, j;
3265   struct iv_cand *cand_1, *cand_2;
3266
3267   data->iv_common_cands.qsort (common_cand_cmp);
3268   for (i = 0; i < data->iv_common_cands.length (); i++)
3269     {
3270       struct iv_common_cand *ptr = data->iv_common_cands[i];
3271
3272       /* Only add IV candidate if it's derived from multiple uses.  */
3273       if (ptr->uses.length () <= 1)
3274         break;
3275
3276       cand_1 = NULL;
3277       cand_2 = NULL;
3278       if (ip_normal_pos (data->current_loop))
3279         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3280                                   false, IP_NORMAL, NULL, NULL);
3281
3282       if (ip_end_pos (data->current_loop)
3283           && allow_ip_end_pos_p (data->current_loop))
3284         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3285                                   false, IP_END, NULL, NULL);
3286
3287       /* Bind deriving uses and the new candidates.  */
3288       for (j = 0; j < ptr->uses.length (); j++)
3289         {
3290           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3291           if (cand_1)
3292             bitmap_set_bit (group->related_cands, cand_1->id);
3293           if (cand_2)
3294             bitmap_set_bit (group->related_cands, cand_2->id);
3295         }
3296     }
3297
3298   /* Release data since it is useless from this point.  */
3299   data->iv_common_cand_tab->empty ();
3300   data->iv_common_cands.truncate (0);
3301 }
3302
3303 /* Adds candidates based on the value of USE's iv.  */
3304
3305 static void
3306 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3307 {
3308   unsigned HOST_WIDE_INT offset;
3309   tree base;
3310   tree basetype;
3311   struct iv *iv = use->iv;
3312
3313   add_candidate (data, iv->base, iv->step, false, use);
3314
3315   /* Record common candidate for use in case it can be shared by others.  */
3316   record_common_cand (data, iv->base, iv->step, use);
3317
3318   /* Record common candidate with initial value zero.  */
3319   basetype = TREE_TYPE (iv->base);
3320   if (POINTER_TYPE_P (basetype))
3321     basetype = sizetype;
3322   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3323
3324   /* Record common candidate with constant offset stripped in base.
3325      Like the use itself, we also add candidate directly for it.  */
3326   base = strip_offset (iv->base, &offset);
3327   if (offset || base != iv->base)
3328     {
3329       record_common_cand (data, base, iv->step, use);
3330       add_candidate (data, base, iv->step, false, use);
3331     }
3332
3333   /* Record common candidate with base_object removed in base.  */
3334   if (iv->base_object != NULL)
3335     {
3336       unsigned i;
3337       aff_tree aff_base;
3338       tree step, base_object = iv->base_object;
3339
3340       base = iv->base;
3341       step = iv->step;
3342       STRIP_NOPS (base);
3343       STRIP_NOPS (step);
3344       STRIP_NOPS (base_object);
3345       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3346       for (i = 0; i < aff_base.n; i++)
3347         {
3348           if (aff_base.elts[i].coef != 1)
3349             continue;
3350
3351           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3352             break;
3353         }
3354       if (i < aff_base.n)
3355         {
3356           aff_combination_remove_elt (&aff_base, i);
3357           base = aff_combination_to_tree (&aff_base);
3358           basetype = TREE_TYPE (base);
3359           if (POINTER_TYPE_P (basetype))
3360             basetype = sizetype;
3361
3362           step = fold_convert (basetype, step);
3363           record_common_cand (data, base, step, use);
3364           /* Also record common candidate with offset stripped.  */
3365           base = strip_offset (base, &offset);
3366           if (offset)
3367             record_common_cand (data, base, step, use);
3368         }
3369     }
3370
3371   /* At last, add auto-incremental candidates.  Make such variables
3372      important since other iv uses with same base object may be based
3373      on it.  */
3374   if (use != NULL && use->type == USE_ADDRESS)
3375     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3376 }
3377
3378 /* Adds candidates based on the uses.  */
3379
3380 static void
3381 add_iv_candidate_for_groups (struct ivopts_data *data)
3382 {
3383   unsigned i;
3384
3385   /* Only add candidate for the first use in group.  */
3386   for (i = 0; i < data->vgroups.length (); i++)
3387     {
3388       struct iv_group *group = data->vgroups[i];
3389
3390       gcc_assert (group->vuses[0] != NULL);
3391       add_iv_candidate_for_use (data, group->vuses[0]);
3392     }
3393   add_iv_candidate_derived_from_uses (data);
3394 }
3395
3396 /* Record important candidates and add them to related_cands bitmaps.  */
3397
3398 static void
3399 record_important_candidates (struct ivopts_data *data)
3400 {
3401   unsigned i;
3402   struct iv_group *group;
3403
3404   for (i = 0; i < data->vcands.length (); i++)
3405     {
3406       struct iv_cand *cand = data->vcands[i];
3407
3408       if (cand->important)
3409         bitmap_set_bit (data->important_candidates, i);
3410     }
3411
3412   data->consider_all_candidates = (data->vcands.length ()
3413                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3414
3415   /* Add important candidates to groups' related_cands bitmaps.  */
3416   for (i = 0; i < data->vgroups.length (); i++)
3417     {
3418       group = data->vgroups[i];
3419       bitmap_ior_into (group->related_cands, data->important_candidates);
3420     }
3421 }
3422
3423 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3424    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3425    we allocate a simple list to every use.  */
3426
3427 static void
3428 alloc_use_cost_map (struct ivopts_data *data)
3429 {
3430   unsigned i, size, s;
3431
3432   for (i = 0; i < data->vgroups.length (); i++)
3433     {
3434       struct iv_group *group = data->vgroups[i];
3435
3436       if (data->consider_all_candidates)
3437         size = data->vcands.length ();
3438       else
3439         {
3440           s = bitmap_count_bits (group->related_cands);
3441
3442           /* Round up to the power of two, so that moduling by it is fast.  */
3443           size = s ? (1 << ceil_log2 (s)) : 1;
3444         }
3445
3446       group->n_map_members = size;
3447       group->cost_map = XCNEWVEC (struct cost_pair, size);
3448     }
3449 }
3450
3451 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3452    on invariants DEPENDS_ON and that the value used in expressing it
3453    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3454
3455 static void
3456 set_group_iv_cost (struct ivopts_data *data,
3457                    struct iv_group *group, struct iv_cand *cand,
3458                    comp_cost cost, bitmap depends_on, tree value,
3459                    enum tree_code comp, iv_inv_expr_ent *inv_expr)
3460 {
3461   unsigned i, s;
3462
3463   if (cost.infinite_cost_p ())
3464     {
3465       BITMAP_FREE (depends_on);
3466       return;
3467     }
3468
3469   if (data->consider_all_candidates)
3470     {
3471       group->cost_map[cand->id].cand = cand;
3472       group->cost_map[cand->id].cost = cost;
3473       group->cost_map[cand->id].depends_on = depends_on;
3474       group->cost_map[cand->id].value = value;
3475       group->cost_map[cand->id].comp = comp;
3476       group->cost_map[cand->id].inv_expr = inv_expr;
3477       return;
3478     }
3479
3480   /* n_map_members is a power of two, so this computes modulo.  */
3481   s = cand->id & (group->n_map_members - 1);
3482   for (i = s; i < group->n_map_members; i++)
3483     if (!group->cost_map[i].cand)
3484       goto found;
3485   for (i = 0; i < s; i++)
3486     if (!group->cost_map[i].cand)
3487       goto found;
3488
3489   gcc_unreachable ();
3490
3491 found:
3492   group->cost_map[i].cand = cand;
3493   group->cost_map[i].cost = cost;
3494   group->cost_map[i].depends_on = depends_on;
3495   group->cost_map[i].value = value;
3496   group->cost_map[i].comp = comp;
3497   group->cost_map[i].inv_expr = inv_expr;
3498 }
3499
3500 /* Gets cost of (GROUP, CAND) pair.  */
3501
3502 static struct cost_pair *
3503 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3504                    struct iv_cand *cand)
3505 {
3506   unsigned i, s;
3507   struct cost_pair *ret;
3508
3509   if (!cand)
3510     return NULL;
3511
3512   if (data->consider_all_candidates)
3513     {
3514       ret = group->cost_map + cand->id;
3515       if (!ret->cand)
3516         return NULL;
3517
3518       return ret;
3519     }
3520
3521   /* n_map_members is a power of two, so this computes modulo.  */
3522   s = cand->id & (group->n_map_members - 1);
3523   for (i = s; i < group->n_map_members; i++)
3524     if (group->cost_map[i].cand == cand)
3525       return group->cost_map + i;
3526     else if (group->cost_map[i].cand == NULL)
3527       return NULL;
3528   for (i = 0; i < s; i++)
3529     if (group->cost_map[i].cand == cand)
3530       return group->cost_map + i;
3531     else if (group->cost_map[i].cand == NULL)
3532       return NULL;
3533
3534   return NULL;
3535 }
3536
3537 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3538 static rtx
3539 produce_memory_decl_rtl (tree obj, int *regno)
3540 {
3541   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3542   machine_mode address_mode = targetm.addr_space.address_mode (as);
3543   rtx x;
3544
3545   gcc_assert (obj);
3546   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3547     {
3548       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3549       x = gen_rtx_SYMBOL_REF (address_mode, name);
3550       SET_SYMBOL_REF_DECL (x, obj);
3551       x = gen_rtx_MEM (DECL_MODE (obj), x);
3552       set_mem_addr_space (x, as);
3553       targetm.encode_section_info (obj, x, true);
3554     }
3555   else
3556     {
3557       x = gen_raw_REG (address_mode, (*regno)++);
3558       x = gen_rtx_MEM (DECL_MODE (obj), x);
3559       set_mem_addr_space (x, as);
3560     }
3561
3562   return x;
3563 }
3564
3565 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3566    walk_tree.  DATA contains the actual fake register number.  */
3567
3568 static tree
3569 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3570 {
3571   tree obj = NULL_TREE;
3572   rtx x = NULL_RTX;
3573   int *regno = (int *) data;
3574
3575   switch (TREE_CODE (*expr_p))
3576     {
3577     case ADDR_EXPR:
3578       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3579            handled_component_p (*expr_p);
3580            expr_p = &TREE_OPERAND (*expr_p, 0))
3581         continue;
3582       obj = *expr_p;
3583       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3584         x = produce_memory_decl_rtl (obj, regno);
3585       break;
3586
3587     case SSA_NAME:
3588       *ws = 0;
3589       obj = SSA_NAME_VAR (*expr_p);
3590       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3591       if (!obj)
3592         return NULL_TREE;
3593       if (!DECL_RTL_SET_P (obj))
3594         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3595       break;
3596
3597     case VAR_DECL:
3598     case PARM_DECL:
3599     case RESULT_DECL:
3600       *ws = 0;
3601       obj = *expr_p;
3602
3603       if (DECL_RTL_SET_P (obj))
3604         break;
3605
3606       if (DECL_MODE (obj) == BLKmode)
3607         x = produce_memory_decl_rtl (obj, regno);
3608       else
3609         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3610
3611       break;
3612
3613     default:
3614       break;
3615     }
3616
3617   if (x)
3618     {
3619       decl_rtl_to_reset.safe_push (obj);
3620       SET_DECL_RTL (obj, x);
3621     }
3622
3623   return NULL_TREE;
3624 }
3625
3626 /* Determines cost of the computation of EXPR.  */
3627
3628 static unsigned
3629 computation_cost (tree expr, bool speed)
3630 {
3631   rtx_insn *seq;
3632   rtx rslt;
3633   tree type = TREE_TYPE (expr);
3634   unsigned cost;
3635   /* Avoid using hard regs in ways which may be unsupported.  */
3636   int regno = LAST_VIRTUAL_REGISTER + 1;
3637   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3638   enum node_frequency real_frequency = node->frequency;
3639
3640   node->frequency = NODE_FREQUENCY_NORMAL;
3641   crtl->maybe_hot_insn_p = speed;
3642   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3643   start_sequence ();
3644   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3645   seq = get_insns ();
3646   end_sequence ();
3647   default_rtl_profile ();
3648   node->frequency = real_frequency;
3649
3650   cost = seq_cost (seq, speed);
3651   if (MEM_P (rslt))
3652     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3653                           TYPE_ADDR_SPACE (type), speed);
3654   else if (!REG_P (rslt))
3655     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3656
3657   return cost;
3658 }
3659
3660 /* Returns variable containing the value of candidate CAND at statement AT.  */
3661
3662 static tree
3663 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3664 {
3665   if (stmt_after_increment (loop, cand, stmt))
3666     return cand->var_after;
3667   else
3668     return cand->var_before;
3669 }
3670
3671 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3672    same precision that is at least as wide as the precision of TYPE, stores
3673    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3674    type of A and B.  */
3675
3676 static tree
3677 determine_common_wider_type (tree *a, tree *b)
3678 {
3679   tree wider_type = NULL;
3680   tree suba, subb;
3681   tree atype = TREE_TYPE (*a);
3682
3683   if (CONVERT_EXPR_P (*a))
3684     {
3685       suba = TREE_OPERAND (*a, 0);
3686       wider_type = TREE_TYPE (suba);
3687       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3688         return atype;
3689     }
3690   else
3691     return atype;
3692
3693   if (CONVERT_EXPR_P (*b))
3694     {
3695       subb = TREE_OPERAND (*b, 0);
3696       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3697         return atype;
3698     }
3699   else
3700     return atype;
3701
3702   *a = suba;
3703   *b = subb;
3704   return wider_type;
3705 }
3706
3707 /* Determines the expression by that USE is expressed from induction variable
3708    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3709    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3710
3711 static bool
3712 get_computation_aff (struct loop *loop,
3713                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3714                      struct aff_tree *aff)
3715 {
3716   tree ubase = use->iv->base;
3717   tree ustep = use->iv->step;
3718   tree cbase = cand->iv->base;
3719   tree cstep = cand->iv->step, cstep_common;
3720   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3721   tree common_type, var;
3722   tree uutype;
3723   aff_tree cbase_aff, var_aff;
3724   widest_int rat;
3725
3726   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3727     {
3728       /* We do not have a precision to express the values of use.  */
3729       return false;
3730     }
3731
3732   var = var_at_stmt (loop, cand, at);
3733   uutype = unsigned_type_for (utype);
3734
3735   /* If the conversion is not noop, perform it.  */
3736   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3737     {
3738       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3739           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3740         {
3741           tree inner_base, inner_step, inner_type;
3742           inner_base = TREE_OPERAND (cbase, 0);
3743           if (CONVERT_EXPR_P (cstep))
3744             inner_step = TREE_OPERAND (cstep, 0);
3745           else
3746             inner_step = cstep;
3747
3748           inner_type = TREE_TYPE (inner_base);
3749           /* If candidate is added from a biv whose type is smaller than
3750              ctype, we know both candidate and the biv won't overflow.
3751              In this case, it's safe to skip the convertion in candidate.
3752              As an example, (unsigned short)((unsigned long)A) equals to
3753              (unsigned short)A, if A has a type no larger than short.  */
3754           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3755             {
3756               cbase = inner_base;
3757               cstep = inner_step;
3758             }
3759         }
3760       cstep = fold_convert (uutype, cstep);
3761       cbase = fold_convert (uutype, cbase);
3762       var = fold_convert (uutype, var);
3763     }
3764
3765   /* Ratio is 1 when computing the value of biv cand by itself.
3766      We can't rely on constant_multiple_of in this case because the
3767      use is created after the original biv is selected.  The call
3768      could fail because of inconsistent fold behavior.  See PR68021
3769      for more information.  */
3770   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3771     {
3772       gcc_assert (is_gimple_assign (use->stmt));
3773       gcc_assert (use->iv->ssa_name == cand->var_after);
3774       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3775       rat = 1;
3776     }
3777   else if (!constant_multiple_of (ustep, cstep, &rat))
3778     return false;
3779
3780   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3781      type, we achieve better folding by computing their difference in this
3782      wider type, and cast the result to UUTYPE.  We do not need to worry about
3783      overflows, as all the arithmetics will in the end be performed in UUTYPE
3784      anyway.  */
3785   common_type = determine_common_wider_type (&ubase, &cbase);
3786
3787   /* use = ubase - ratio * cbase + ratio * var.  */
3788   tree_to_aff_combination (ubase, common_type, aff);
3789   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3790   tree_to_aff_combination (var, uutype, &var_aff);
3791
3792   /* We need to shift the value if we are after the increment.  */
3793   if (stmt_after_increment (loop, cand, at))
3794     {
3795       aff_tree cstep_aff;
3796
3797       if (common_type != uutype)
3798         cstep_common = fold_convert (common_type, cstep);
3799       else
3800         cstep_common = cstep;
3801
3802       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3803       aff_combination_add (&cbase_aff, &cstep_aff);
3804     }
3805
3806   aff_combination_scale (&cbase_aff, -rat);
3807   aff_combination_add (aff, &cbase_aff);
3808   if (common_type != uutype)
3809     aff_combination_convert (aff, uutype);
3810
3811   aff_combination_scale (&var_aff, rat);
3812   aff_combination_add (aff, &var_aff);
3813
3814   return true;
3815 }
3816
3817 /* Return the type of USE.  */
3818
3819 static tree
3820 get_use_type (struct iv_use *use)
3821 {
3822   tree base_type = TREE_TYPE (use->iv->base);
3823   tree type;
3824
3825   if (use->type == USE_ADDRESS)
3826     {
3827       /* The base_type may be a void pointer.  Create a pointer type based on
3828          the mem_ref instead.  */
3829       type = build_pointer_type (TREE_TYPE (*use->op_p));
3830       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3831                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3832     }
3833   else
3834     type = base_type;
3835
3836   return type;
3837 }
3838
3839 /* Determines the expression by that USE is expressed from induction variable
3840    CAND at statement AT in LOOP.  The computation is unshared.  */
3841
3842 static tree
3843 get_computation_at (struct loop *loop,
3844                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3845 {
3846   aff_tree aff;
3847   tree type = get_use_type (use);
3848
3849   if (!get_computation_aff (loop, use, cand, at, &aff))
3850     return NULL_TREE;
3851   unshare_aff_combination (&aff);
3852   return fold_convert (type, aff_combination_to_tree (&aff));
3853 }
3854
3855 /* Determines the expression by that USE is expressed from induction variable
3856    CAND in LOOP.  The computation is unshared.  */
3857
3858 static tree
3859 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3860 {
3861   return get_computation_at (loop, use, cand, use->stmt);
3862 }
3863
3864 /* Adjust the cost COST for being in loop setup rather than loop body.
3865    If we're optimizing for space, the loop setup overhead is constant;
3866    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3867 static unsigned
3868 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3869 {
3870   if (cost == INFTY)
3871     return cost;
3872   else if (optimize_loop_for_speed_p (data->current_loop))
3873     return cost / avg_loop_niter (data->current_loop);
3874   else
3875     return cost;
3876 }
3877
3878 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3879    validity for a memory reference accessing memory of mode MODE in
3880    address space AS.  */
3881
3882
3883 bool
3884 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3885                                  addr_space_t as)
3886 {
3887 #define MAX_RATIO 128
3888   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3889   static vec<sbitmap> valid_mult_list;
3890   sbitmap valid_mult;
3891
3892   if (data_index >= valid_mult_list.length ())
3893     valid_mult_list.safe_grow_cleared (data_index + 1);
3894
3895   valid_mult = valid_mult_list[data_index];
3896   if (!valid_mult)
3897     {
3898       machine_mode address_mode = targetm.addr_space.address_mode (as);
3899       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3900       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3901       rtx addr, scaled;
3902       HOST_WIDE_INT i;
3903
3904       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3905       bitmap_clear (valid_mult);
3906       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3907       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3908       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3909         {
3910           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3911           if (memory_address_addr_space_p (mode, addr, as)
3912               || memory_address_addr_space_p (mode, scaled, as))
3913             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3914         }
3915
3916       if (dump_file && (dump_flags & TDF_DETAILS))
3917         {
3918           fprintf (dump_file, "  allowed multipliers:");
3919           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3920             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3921               fprintf (dump_file, " %d", (int) i);
3922           fprintf (dump_file, "\n");
3923           fprintf (dump_file, "\n");
3924         }
3925
3926       valid_mult_list[data_index] = valid_mult;
3927     }
3928
3929   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3930     return false;
3931
3932   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3933 }
3934
3935 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3936    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3937    variable is omitted.  Compute the cost for a memory reference that accesses
3938    a memory location of mode MEM_MODE in address space AS.
3939
3940    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3941    size of MEM_MODE / RATIO) is available.  To make this determination, we
3942    look at the size of the increment to be made, which is given in CSTEP.
3943    CSTEP may be zero if the step is unknown.
3944    STMT_AFTER_INC is true iff the statement we're looking at is after the
3945    increment of the original biv.
3946
3947    TODO -- there must be some better way.  This all is quite crude.  */
3948
3949 enum ainc_type
3950 {
3951   AINC_PRE_INC,         /* Pre increment.  */
3952   AINC_PRE_DEC,         /* Pre decrement.  */
3953   AINC_POST_INC,        /* Post increment.  */
3954   AINC_POST_DEC,        /* Post decrement.  */
3955   AINC_NONE             /* Also the number of auto increment types.  */
3956 };
3957
3958 struct address_cost_data
3959 {
3960   HOST_WIDE_INT min_offset, max_offset;
3961   unsigned costs[2][2][2][2];
3962   unsigned ainc_costs[AINC_NONE];
3963 };
3964
3965
3966 static comp_cost
3967 get_address_cost (bool symbol_present, bool var_present,
3968                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3969                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3970                   addr_space_t as, bool speed,
3971                   bool stmt_after_inc, bool *may_autoinc)
3972 {
3973   machine_mode address_mode = targetm.addr_space.address_mode (as);
3974   static vec<address_cost_data *> address_cost_data_list;
3975   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3976   address_cost_data *data;
3977   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3978   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3979   unsigned cost, acost, complexity;
3980   enum ainc_type autoinc_type;
3981   bool offset_p, ratio_p, autoinc;
3982   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3983   unsigned HOST_WIDE_INT mask;
3984   unsigned bits;
3985
3986   if (data_index >= address_cost_data_list.length ())
3987     address_cost_data_list.safe_grow_cleared (data_index + 1);
3988
3989   data = address_cost_data_list[data_index];
3990   if (!data)
3991     {
3992       HOST_WIDE_INT i;
3993       HOST_WIDE_INT rat, off = 0;
3994       int old_cse_not_expected, width;
3995       unsigned sym_p, var_p, off_p, rat_p, add_c;
3996       rtx_insn *seq;
3997       rtx addr, base;
3998       rtx reg0, reg1;
3999
4000       data = (address_cost_data *) xcalloc (1, sizeof (*data));
4001
4002       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4003
4004       width = GET_MODE_BITSIZE (address_mode) - 1;
4005       if (width > (HOST_BITS_PER_WIDE_INT - 1))
4006         width = HOST_BITS_PER_WIDE_INT - 1;
4007       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
4008
4009       for (i = width; i >= 0; i--)
4010         {
4011           off = -(HOST_WIDE_INT_1U << i);
4012           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4013           if (memory_address_addr_space_p (mem_mode, addr, as))
4014             break;
4015         }
4016       data->min_offset = (i == -1? 0 : off);
4017
4018       for (i = width; i >= 0; i--)
4019         {
4020           off = (HOST_WIDE_INT_1U << i) - 1;
4021           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4022           if (memory_address_addr_space_p (mem_mode, addr, as))
4023             break;
4024           /* For some strict-alignment targets, the offset must be naturally
4025              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4026           off = mem_mode != QImode
4027                 ? (HOST_WIDE_INT_1U << i)
4028                     - GET_MODE_SIZE (mem_mode)
4029                 : 0;
4030           if (off > 0)
4031             {
4032               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4033               if (memory_address_addr_space_p (mem_mode, addr, as))
4034                 break;
4035             }
4036         }
4037       if (i == -1)
4038         off = 0;
4039       data->max_offset = off;
4040
4041       if (dump_file && (dump_flags & TDF_DETAILS))
4042         {
4043           fprintf (dump_file, "get_address_cost:\n");
4044           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4045                    GET_MODE_NAME (mem_mode),
4046                    data->min_offset);
4047           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4048                    GET_MODE_NAME (mem_mode),
4049                    data->max_offset);
4050         }
4051
4052       rat = 1;
4053       for (i = 2; i <= MAX_RATIO; i++)
4054         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4055           {
4056             rat = i;
4057             break;
4058           }
4059
4060       /* Compute the cost of various addressing modes.  */
4061       acost = 0;
4062       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4063       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4064
4065       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4066           || USE_STORE_PRE_DECREMENT (mem_mode))
4067         {
4068           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4069           has_predec[mem_mode]
4070             = memory_address_addr_space_p (mem_mode, addr, as);
4071
4072           if (has_predec[mem_mode])
4073             data->ainc_costs[AINC_PRE_DEC]
4074               = address_cost (addr, mem_mode, as, speed);
4075         }
4076       if (USE_LOAD_POST_DECREMENT (mem_mode)
4077           || USE_STORE_POST_DECREMENT (mem_mode))
4078         {
4079           addr = gen_rtx_POST_DEC (address_mode, reg0);
4080           has_postdec[mem_mode]
4081             = memory_address_addr_space_p (mem_mode, addr, as);
4082
4083           if (has_postdec[mem_mode])
4084             data->ainc_costs[AINC_POST_DEC]
4085               = address_cost (addr, mem_mode, as, speed);
4086         }
4087       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4088           || USE_STORE_PRE_DECREMENT (mem_mode))
4089         {
4090           addr = gen_rtx_PRE_INC (address_mode, reg0);
4091           has_preinc[mem_mode]
4092             = memory_address_addr_space_p (mem_mode, addr, as);
4093
4094           if (has_preinc[mem_mode])
4095             data->ainc_costs[AINC_PRE_INC]
4096               = address_cost (addr, mem_mode, as, speed);
4097         }
4098       if (USE_LOAD_POST_INCREMENT (mem_mode)
4099           || USE_STORE_POST_INCREMENT (mem_mode))
4100         {
4101           addr = gen_rtx_POST_INC (address_mode, reg0);
4102           has_postinc[mem_mode]
4103             = memory_address_addr_space_p (mem_mode, addr, as);
4104
4105           if (has_postinc[mem_mode])
4106             data->ainc_costs[AINC_POST_INC]
4107               = address_cost (addr, mem_mode, as, speed);
4108         }
4109       for (i = 0; i < 16; i++)
4110         {
4111           sym_p = i & 1;
4112           var_p = (i >> 1) & 1;
4113           off_p = (i >> 2) & 1;
4114           rat_p = (i >> 3) & 1;
4115
4116           addr = reg0;
4117           if (rat_p)
4118             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4119                                    gen_int_mode (rat, address_mode));
4120
4121           if (var_p)
4122             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4123
4124           if (sym_p)
4125             {
4126               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4127               /* ??? We can run into trouble with some backends by presenting
4128                  it with symbols which haven't been properly passed through
4129                  targetm.encode_section_info.  By setting the local bit, we
4130                  enhance the probability of things working.  */
4131               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4132
4133               if (off_p)
4134                 base = gen_rtx_fmt_e (CONST, address_mode,
4135                                       gen_rtx_fmt_ee
4136                                         (PLUS, address_mode, base,
4137                                          gen_int_mode (off, address_mode)));
4138             }
4139           else if (off_p)
4140             base = gen_int_mode (off, address_mode);
4141           else
4142             base = NULL_RTX;
4143
4144           if (base)
4145             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4146
4147           start_sequence ();
4148           /* To avoid splitting addressing modes, pretend that no cse will
4149              follow.  */
4150           old_cse_not_expected = cse_not_expected;
4151           cse_not_expected = true;
4152           addr = memory_address_addr_space (mem_mode, addr, as);
4153           cse_not_expected = old_cse_not_expected;
4154           seq = get_insns ();
4155           end_sequence ();
4156
4157           acost = seq_cost (seq, speed);
4158           acost += address_cost (addr, mem_mode, as, speed);
4159
4160           if (!acost)
4161             acost = 1;
4162           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4163         }
4164
4165       /* On some targets, it is quite expensive to load symbol to a register,
4166          which makes addresses that contain symbols look much more expensive.
4167          However, the symbol will have to be loaded in any case before the
4168          loop (and quite likely we have it in register already), so it does not
4169          make much sense to penalize them too heavily.  So make some final
4170          tweaks for the SYMBOL_PRESENT modes:
4171
4172          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4173          var is cheaper, use this mode with small penalty.
4174          If VAR_PRESENT is true, try whether the mode with
4175          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4176          if this is the case, use it.  */
4177       add_c = add_cost (speed, address_mode);
4178       for (i = 0; i < 8; i++)
4179         {
4180           var_p = i & 1;
4181           off_p = (i >> 1) & 1;
4182           rat_p = (i >> 2) & 1;
4183
4184           acost = data->costs[0][1][off_p][rat_p] + 1;
4185           if (var_p)
4186             acost += add_c;
4187
4188           if (acost < data->costs[1][var_p][off_p][rat_p])
4189             data->costs[1][var_p][off_p][rat_p] = acost;
4190         }
4191
4192       if (dump_file && (dump_flags & TDF_DETAILS))
4193         {
4194           fprintf (dump_file, "<Address Costs>:\n");
4195
4196           for (i = 0; i < 16; i++)
4197             {
4198               sym_p = i & 1;
4199               var_p = (i >> 1) & 1;
4200               off_p = (i >> 2) & 1;
4201               rat_p = (i >> 3) & 1;
4202
4203               fprintf (dump_file, "  ");
4204               if (sym_p)
4205                 fprintf (dump_file, "sym + ");
4206               if (var_p)
4207                 fprintf (dump_file, "var + ");
4208               if (off_p)
4209                 fprintf (dump_file, "cst + ");
4210               if (rat_p)
4211                 fprintf (dump_file, "rat * ");
4212
4213               acost = data->costs[sym_p][var_p][off_p][rat_p];
4214               fprintf (dump_file, "index costs %d\n", acost);
4215             }
4216           if (has_predec[mem_mode] || has_postdec[mem_mode]
4217               || has_preinc[mem_mode] || has_postinc[mem_mode])
4218             fprintf (dump_file, "  May include autoinc/dec\n");
4219           fprintf (dump_file, "\n");
4220         }
4221
4222       address_cost_data_list[data_index] = data;
4223     }
4224
4225   bits = GET_MODE_BITSIZE (address_mode);
4226   mask = ~(HOST_WIDE_INT_M1U << (bits - 1) << 1);
4227   offset &= mask;
4228   if ((offset >> (bits - 1) & 1))
4229     offset |= ~mask;
4230   s_offset = offset;
4231
4232   autoinc = false;
4233   autoinc_type = AINC_NONE;
4234   msize = GET_MODE_SIZE (mem_mode);
4235   autoinc_offset = offset;
4236   if (stmt_after_inc)
4237     autoinc_offset += ratio * cstep;
4238   if (symbol_present || var_present || ratio != 1)
4239     autoinc = false;
4240   else
4241     {
4242       if (has_postinc[mem_mode] && autoinc_offset == 0
4243           && msize == cstep)
4244         autoinc_type = AINC_POST_INC;
4245       else if (has_postdec[mem_mode] && autoinc_offset == 0
4246                && msize == -cstep)
4247         autoinc_type = AINC_POST_DEC;
4248       else if (has_preinc[mem_mode] && autoinc_offset == msize
4249                && msize == cstep)
4250         autoinc_type = AINC_PRE_INC;
4251       else if (has_predec[mem_mode] && autoinc_offset == -msize
4252                && msize == -cstep)
4253         autoinc_type = AINC_PRE_DEC;
4254
4255       if (autoinc_type != AINC_NONE)
4256         autoinc = true;
4257     }
4258
4259   cost = 0;
4260   offset_p = (s_offset != 0
4261               && data->min_offset <= s_offset
4262               && s_offset <= data->max_offset);
4263   ratio_p = (ratio != 1
4264              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4265
4266   if (ratio != 1 && !ratio_p)
4267     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4268
4269   if (s_offset && !offset_p && !symbol_present)
4270     cost += add_cost (speed, address_mode);
4271
4272   if (may_autoinc)
4273     *may_autoinc = autoinc;
4274   if (autoinc)
4275     acost = data->ainc_costs[autoinc_type];
4276   else
4277     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4278   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4279   return comp_cost (cost + acost, complexity);
4280 }
4281
4282  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4283     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4284     calculating the operands of EXPR.  Returns true if successful, and returns
4285     the cost in COST.  */
4286
4287 static bool
4288 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4289                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4290 {
4291   comp_cost res;
4292   tree op1 = TREE_OPERAND (expr, 1);
4293   tree cst = TREE_OPERAND (mult, 1);
4294   tree multop = TREE_OPERAND (mult, 0);
4295   int m = exact_log2 (int_cst_value (cst));
4296   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4297   int as_cost, sa_cost;
4298   bool mult_in_op1;
4299
4300   if (!(m >= 0 && m < maxm))
4301     return false;
4302
4303   STRIP_NOPS (op1);
4304   mult_in_op1 = operand_equal_p (op1, mult, 0);
4305
4306   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4307
4308   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4309      use that in preference to a shift insn followed by an add insn.  */
4310   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4311              ? shiftadd_cost (speed, mode, m)
4312              : (mult_in_op1
4313                 ? shiftsub1_cost (speed, mode, m)
4314                 : shiftsub0_cost (speed, mode, m)));
4315
4316   res = comp_cost (MIN (as_cost, sa_cost), 0);
4317   res += (mult_in_op1 ? cost0 : cost1);
4318
4319   STRIP_NOPS (multop);
4320   if (!is_gimple_val (multop))
4321     res += force_expr_to_var_cost (multop, speed);
4322
4323   *cost = res;
4324   return true;
4325 }
4326
4327 /* Estimates cost of forcing expression EXPR into a variable.  */
4328
4329 static comp_cost
4330 force_expr_to_var_cost (tree expr, bool speed)
4331 {
4332   static bool costs_initialized = false;
4333   static unsigned integer_cost [2];
4334   static unsigned symbol_cost [2];
4335   static unsigned address_cost [2];
4336   tree op0, op1;
4337   comp_cost cost0, cost1, cost;
4338   machine_mode mode;
4339
4340   if (!costs_initialized)
4341     {
4342       tree type = build_pointer_type (integer_type_node);
4343       tree var, addr;
4344       rtx x;
4345       int i;
4346
4347       var = create_tmp_var_raw (integer_type_node, "test_var");
4348       TREE_STATIC (var) = 1;
4349       x = produce_memory_decl_rtl (var, NULL);
4350       SET_DECL_RTL (var, x);
4351
4352       addr = build1 (ADDR_EXPR, type, var);
4353
4354
4355       for (i = 0; i < 2; i++)
4356         {
4357           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4358                                                              2000), i);
4359
4360           symbol_cost[i] = computation_cost (addr, i) + 1;
4361
4362           address_cost[i]
4363             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4364           if (dump_file && (dump_flags & TDF_DETAILS))
4365             {
4366               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4367               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4368               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4369               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4370               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4371               fprintf (dump_file, "\n");
4372             }
4373         }
4374
4375       costs_initialized = true;
4376     }
4377
4378   STRIP_NOPS (expr);
4379
4380   if (SSA_VAR_P (expr))
4381     return no_cost;
4382
4383   if (is_gimple_min_invariant (expr))
4384     {
4385       if (TREE_CODE (expr) == INTEGER_CST)
4386         return comp_cost (integer_cost [speed], 0);
4387
4388       if (TREE_CODE (expr) == ADDR_EXPR)
4389         {
4390           tree obj = TREE_OPERAND (expr, 0);
4391
4392           if (VAR_P (obj)
4393               || TREE_CODE (obj) == PARM_DECL
4394               || TREE_CODE (obj) == RESULT_DECL)
4395             return comp_cost (symbol_cost [speed], 0);
4396         }
4397
4398       return comp_cost (address_cost [speed], 0);
4399     }
4400
4401   switch (TREE_CODE (expr))
4402     {
4403     case POINTER_PLUS_EXPR:
4404     case PLUS_EXPR:
4405     case MINUS_EXPR:
4406     case MULT_EXPR:
4407       op0 = TREE_OPERAND (expr, 0);
4408       op1 = TREE_OPERAND (expr, 1);
4409       STRIP_NOPS (op0);
4410       STRIP_NOPS (op1);
4411       break;
4412
4413     CASE_CONVERT:
4414     case NEGATE_EXPR:
4415       op0 = TREE_OPERAND (expr, 0);
4416       STRIP_NOPS (op0);
4417       op1 = NULL_TREE;
4418       break;
4419
4420     default:
4421       /* Just an arbitrary value, FIXME.  */
4422       return comp_cost (target_spill_cost[speed], 0);
4423     }
4424
4425   if (op0 == NULL_TREE
4426       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4427     cost0 = no_cost;
4428   else
4429     cost0 = force_expr_to_var_cost (op0, speed);
4430
4431   if (op1 == NULL_TREE
4432       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4433     cost1 = no_cost;
4434   else
4435     cost1 = force_expr_to_var_cost (op1, speed);
4436
4437   mode = TYPE_MODE (TREE_TYPE (expr));
4438   switch (TREE_CODE (expr))
4439     {
4440     case POINTER_PLUS_EXPR:
4441     case PLUS_EXPR:
4442     case MINUS_EXPR:
4443     case NEGATE_EXPR:
4444       cost = comp_cost (add_cost (speed, mode), 0);
4445       if (TREE_CODE (expr) != NEGATE_EXPR)
4446         {
4447           tree mult = NULL_TREE;
4448           comp_cost sa_cost;
4449           if (TREE_CODE (op1) == MULT_EXPR)
4450             mult = op1;
4451           else if (TREE_CODE (op0) == MULT_EXPR)
4452             mult = op0;
4453
4454           if (mult != NULL_TREE
4455               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4456               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4457                                     speed, &sa_cost))
4458             return sa_cost;
4459         }
4460       break;
4461
4462     CASE_CONVERT:
4463       {
4464         tree inner_mode, outer_mode;
4465         outer_mode = TREE_TYPE (expr);
4466         inner_mode = TREE_TYPE (op0);
4467         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4468                                        TYPE_MODE (inner_mode), speed), 0);
4469       }
4470       break;
4471
4472     case MULT_EXPR:
4473       if (cst_and_fits_in_hwi (op0))
4474         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4475                                              mode, speed), 0);
4476       else if (cst_and_fits_in_hwi (op1))
4477         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4478                                              mode, speed), 0);
4479       else
4480         return comp_cost (target_spill_cost [speed], 0);
4481       break;
4482
4483     default:
4484       gcc_unreachable ();
4485     }
4486
4487   cost += cost0;
4488   cost += cost1;
4489
4490   /* Bound the cost by target_spill_cost.  The parts of complicated
4491      computations often are either loop invariant or at least can
4492      be shared between several iv uses, so letting this grow without
4493      limits would not give reasonable results.  */
4494   if (cost.cost > (int) target_spill_cost [speed])
4495     cost.cost = target_spill_cost [speed];
4496
4497   return cost;
4498 }
4499
4500 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4501    invariants the computation depends on.  */
4502
4503 static comp_cost
4504 force_var_cost (struct ivopts_data *data,
4505                 tree expr, bitmap *depends_on)
4506 {
4507   if (depends_on)
4508     {
4509       fd_ivopts_data = data;
4510       walk_tree (&expr, find_depends, depends_on, NULL);
4511     }
4512
4513   return force_expr_to_var_cost (expr, data->speed);
4514 }
4515
4516 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4517    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4518    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4519    invariants the computation depends on.  */
4520
4521 static comp_cost
4522 split_address_cost (struct ivopts_data *data,
4523                     tree addr, bool *symbol_present, bool *var_present,
4524                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4525 {
4526   tree core;
4527   HOST_WIDE_INT bitsize;
4528   HOST_WIDE_INT bitpos;
4529   tree toffset;
4530   machine_mode mode;
4531   int unsignedp, reversep, volatilep;
4532
4533   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4534                               &unsignedp, &reversep, &volatilep);
4535
4536   if (toffset != 0
4537       || bitpos % BITS_PER_UNIT != 0
4538       || reversep
4539       || !VAR_P (core))
4540     {
4541       *symbol_present = false;
4542       *var_present = true;
4543       fd_ivopts_data = data;
4544       if (depends_on)
4545         walk_tree (&addr, find_depends, depends_on, NULL);
4546
4547       return comp_cost (target_spill_cost[data->speed], 0);
4548     }
4549
4550   *offset += bitpos / BITS_PER_UNIT;
4551   if (TREE_STATIC (core)
4552       || DECL_EXTERNAL (core))
4553     {
4554       *symbol_present = true;
4555       *var_present = false;
4556       return no_cost;
4557     }
4558
4559   *symbol_present = false;
4560   *var_present = true;
4561   return no_cost;
4562 }
4563
4564 /* Estimates cost of expressing difference of addresses E1 - E2 as
4565    var + symbol + offset.  The value of offset is added to OFFSET,
4566    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4567    part is missing.  DEPENDS_ON is a set of the invariants the computation
4568    depends on.  */
4569
4570 static comp_cost
4571 ptr_difference_cost (struct ivopts_data *data,
4572                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4573                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4574 {
4575   HOST_WIDE_INT diff = 0;
4576   aff_tree aff_e1, aff_e2;
4577   tree type;
4578
4579   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4580
4581   if (ptr_difference_const (e1, e2, &diff))
4582     {
4583       *offset += diff;
4584       *symbol_present = false;
4585       *var_present = false;
4586       return no_cost;
4587     }
4588
4589   if (integer_zerop (e2))
4590     return split_address_cost (data, TREE_OPERAND (e1, 0),
4591                                symbol_present, var_present, offset, depends_on);
4592
4593   *symbol_present = false;
4594   *var_present = true;
4595
4596   type = signed_type_for (TREE_TYPE (e1));
4597   tree_to_aff_combination (e1, type, &aff_e1);
4598   tree_to_aff_combination (e2, type, &aff_e2);
4599   aff_combination_scale (&aff_e2, -1);
4600   aff_combination_add (&aff_e1, &aff_e2);
4601
4602   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4603 }
4604
4605 /* Estimates cost of expressing difference E1 - E2 as
4606    var + symbol + offset.  The value of offset is added to OFFSET,
4607    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4608    part is missing.  DEPENDS_ON is a set of the invariants the computation
4609    depends on.  */
4610
4611 static comp_cost
4612 difference_cost (struct ivopts_data *data,
4613                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4614                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4615 {
4616   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4617   unsigned HOST_WIDE_INT off1, off2;
4618   aff_tree aff_e1, aff_e2;
4619   tree type;
4620
4621   e1 = strip_offset (e1, &off1);
4622   e2 = strip_offset (e2, &off2);
4623   *offset += off1 - off2;
4624
4625   STRIP_NOPS (e1);
4626   STRIP_NOPS (e2);
4627
4628   if (TREE_CODE (e1) == ADDR_EXPR)
4629     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4630                                 offset, depends_on);
4631   *symbol_present = false;
4632
4633   if (operand_equal_p (e1, e2, 0))
4634     {
4635       *var_present = false;
4636       return no_cost;
4637     }
4638
4639   *var_present = true;
4640
4641   if (integer_zerop (e2))
4642     return force_var_cost (data, e1, depends_on);
4643
4644   if (integer_zerop (e1))
4645     {
4646       comp_cost cost = force_var_cost (data, e2, depends_on);
4647       cost += mult_by_coeff_cost (-1, mode, data->speed);
4648       return cost;
4649     }
4650
4651   type = signed_type_for (TREE_TYPE (e1));
4652   tree_to_aff_combination (e1, type, &aff_e1);
4653   tree_to_aff_combination (e2, type, &aff_e2);
4654   aff_combination_scale (&aff_e2, -1);
4655   aff_combination_add (&aff_e1, &aff_e2);
4656
4657   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4658 }
4659
4660 /* Returns true if AFF1 and AFF2 are identical.  */
4661
4662 static bool
4663 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4664 {
4665   unsigned i;
4666
4667   if (aff1->n != aff2->n)
4668     return false;
4669
4670   for (i = 0; i < aff1->n; i++)
4671     {
4672       if (aff1->elts[i].coef != aff2->elts[i].coef)
4673         return false;
4674
4675       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4676         return false;
4677     }
4678   return true;
4679 }
4680
4681 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
4682
4683 static iv_inv_expr_ent *
4684 record_inv_expr (struct ivopts_data *data, tree expr)
4685 {
4686   struct iv_inv_expr_ent ent;
4687   struct iv_inv_expr_ent **slot;
4688
4689   ent.expr = expr;
4690   ent.hash = iterative_hash_expr (expr, 0);
4691   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4692
4693   if (!*slot)
4694     {
4695       *slot = XNEW (struct iv_inv_expr_ent);
4696       (*slot)->expr = expr;
4697       (*slot)->hash = ent.hash;
4698       (*slot)->id = data->max_inv_expr_id++;
4699     }
4700
4701   return *slot;
4702 }
4703
4704 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4705    requires a new compiler generated temporary.  Returns -1 otherwise.
4706    ADDRESS_P is a flag indicating if the expression is for address
4707    computation.  */
4708
4709 static iv_inv_expr_ent *
4710 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4711                          tree cbase, HOST_WIDE_INT ratio,
4712                          bool address_p)
4713 {
4714   aff_tree ubase_aff, cbase_aff;
4715   tree expr, ub, cb;
4716
4717   STRIP_NOPS (ubase);
4718   STRIP_NOPS (cbase);
4719   ub = ubase;
4720   cb = cbase;
4721
4722   if ((TREE_CODE (ubase) == INTEGER_CST)
4723       && (TREE_CODE (cbase) == INTEGER_CST))
4724     return NULL;
4725
4726   /* Strips the constant part. */
4727   if (TREE_CODE (ubase) == PLUS_EXPR
4728       || TREE_CODE (ubase) == MINUS_EXPR
4729       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4730     {
4731       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4732         ubase = TREE_OPERAND (ubase, 0);
4733     }
4734
4735   /* Strips the constant part. */
4736   if (TREE_CODE (cbase) == PLUS_EXPR
4737       || TREE_CODE (cbase) == MINUS_EXPR
4738       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4739     {
4740       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4741         cbase = TREE_OPERAND (cbase, 0);
4742     }
4743
4744   if (address_p)
4745     {
4746       if (((TREE_CODE (ubase) == SSA_NAME)
4747            || (TREE_CODE (ubase) == ADDR_EXPR
4748                && is_gimple_min_invariant (ubase)))
4749           && (TREE_CODE (cbase) == INTEGER_CST))
4750         return NULL;
4751
4752       if (((TREE_CODE (cbase) == SSA_NAME)
4753            || (TREE_CODE (cbase) == ADDR_EXPR
4754                && is_gimple_min_invariant (cbase)))
4755           && (TREE_CODE (ubase) == INTEGER_CST))
4756         return NULL;
4757     }
4758
4759   if (ratio == 1)
4760     {
4761       if (operand_equal_p (ubase, cbase, 0))
4762         return NULL;
4763
4764       if (TREE_CODE (ubase) == ADDR_EXPR
4765           && TREE_CODE (cbase) == ADDR_EXPR)
4766         {
4767           tree usym, csym;
4768
4769           usym = TREE_OPERAND (ubase, 0);
4770           csym = TREE_OPERAND (cbase, 0);
4771           if (TREE_CODE (usym) == ARRAY_REF)
4772             {
4773               tree ind = TREE_OPERAND (usym, 1);
4774               if (TREE_CODE (ind) == INTEGER_CST
4775                   && tree_fits_shwi_p (ind)
4776                   && tree_to_shwi (ind) == 0)
4777                 usym = TREE_OPERAND (usym, 0);
4778             }
4779           if (TREE_CODE (csym) == ARRAY_REF)
4780             {
4781               tree ind = TREE_OPERAND (csym, 1);
4782               if (TREE_CODE (ind) == INTEGER_CST
4783                   && tree_fits_shwi_p (ind)
4784                   && tree_to_shwi (ind) == 0)
4785                 csym = TREE_OPERAND (csym, 0);
4786             }
4787           if (operand_equal_p (usym, csym, 0))
4788             return NULL;
4789         }
4790       /* Now do more complex comparison  */
4791       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4792       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4793       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4794         return NULL;
4795     }
4796
4797   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4798   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4799
4800   aff_combination_scale (&cbase_aff, -1 * ratio);
4801   aff_combination_add (&ubase_aff, &cbase_aff);
4802   expr = aff_combination_to_tree (&ubase_aff);
4803   return record_inv_expr (data, expr);
4804 }
4805
4806 /* Scale (multiply) the computed COST (except scratch part that should be
4807    hoisted out a loop) by header->frequency / AT->frequency,
4808    which makes expected cost more accurate.  */
4809
4810 static comp_cost
4811 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, iv_cand *cand,
4812                                 comp_cost cost)
4813 {
4814    int loop_freq = data->current_loop->header->frequency;
4815    int bb_freq = gimple_bb (at)->frequency;
4816    if (loop_freq != 0)
4817      {
4818        gcc_assert (cost.scratch <= cost.cost);
4819        int scaled_cost
4820          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4821
4822        if (dump_file && (dump_flags & TDF_DETAILS))
4823          fprintf (dump_file, "Scaling iv_use based on cand %d "
4824                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4825                   cand->id, 1.0f * bb_freq / loop_freq, cost.cost,
4826                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4827
4828        cost.cost = scaled_cost;
4829      }
4830
4831   return cost;
4832 }
4833
4834 /* Determines the cost of the computation by that USE is expressed
4835    from induction variable CAND.  If ADDRESS_P is true, we just need
4836    to create an address from it, otherwise we want to get it into
4837    register.  A set of invariants we depend on is stored in
4838    DEPENDS_ON.  AT is the statement at that the value is computed.
4839    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4840    addressing is likely.  */
4841
4842 static comp_cost
4843 get_computation_cost_at (struct ivopts_data *data,
4844                          struct iv_use *use, struct iv_cand *cand,
4845                          bool address_p, bitmap *depends_on, gimple *at,
4846                          bool *can_autoinc,
4847                          iv_inv_expr_ent **inv_expr)
4848 {
4849   tree ubase = use->iv->base, ustep = use->iv->step;
4850   tree cbase, cstep;
4851   tree utype = TREE_TYPE (ubase), ctype;
4852   unsigned HOST_WIDE_INT cstepi, offset = 0;
4853   HOST_WIDE_INT ratio, aratio;
4854   bool var_present, symbol_present, stmt_is_after_inc;
4855   comp_cost cost;
4856   widest_int rat;
4857   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4858   machine_mode mem_mode = (address_p
4859                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4860                                 : VOIDmode);
4861
4862   if (depends_on)
4863     *depends_on = NULL;
4864
4865   /* Only consider real candidates.  */
4866   if (!cand->iv)
4867     return infinite_cost;
4868
4869   cbase = cand->iv->base;
4870   cstep = cand->iv->step;
4871   ctype = TREE_TYPE (cbase);
4872
4873   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4874     {
4875       /* We do not have a precision to express the values of use.  */
4876       return infinite_cost;
4877     }
4878
4879   if (address_p
4880       || (use->iv->base_object
4881           && cand->iv->base_object
4882           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4883           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4884     {
4885       /* Do not try to express address of an object with computation based
4886          on address of a different object.  This may cause problems in rtl
4887          level alias analysis (that does not expect this to be happening,
4888          as this is illegal in C), and would be unlikely to be useful
4889          anyway.  */
4890       if (use->iv->base_object
4891           && cand->iv->base_object
4892           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4893         return infinite_cost;
4894     }
4895
4896   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4897     {
4898       /* TODO -- add direct handling of this case.  */
4899       goto fallback;
4900     }
4901
4902   /* CSTEPI is removed from the offset in case statement is after the
4903      increment.  If the step is not constant, we use zero instead.
4904      This is a bit imprecise (there is the extra addition), but
4905      redundancy elimination is likely to transform the code so that
4906      it uses value of the variable before increment anyway,
4907      so it is not that much unrealistic.  */
4908   if (cst_and_fits_in_hwi (cstep))
4909     cstepi = int_cst_value (cstep);
4910   else
4911     cstepi = 0;
4912
4913   if (!constant_multiple_of (ustep, cstep, &rat))
4914     return infinite_cost;
4915
4916   if (wi::fits_shwi_p (rat))
4917     ratio = rat.to_shwi ();
4918   else
4919     return infinite_cost;
4920
4921   STRIP_NOPS (cbase);
4922   ctype = TREE_TYPE (cbase);
4923
4924   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4925
4926   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4927      or ratio == 1, it is better to handle this like
4928
4929      ubase - ratio * cbase + ratio * var
4930
4931      (also holds in the case ratio == -1, TODO.  */
4932
4933   if (cst_and_fits_in_hwi (cbase))
4934     {
4935       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4936       cost = difference_cost (data,
4937                               ubase, build_int_cst (utype, 0),
4938                               &symbol_present, &var_present, &offset,
4939                               depends_on);
4940       cost /= avg_loop_niter (data->current_loop);
4941     }
4942   else if (ratio == 1)
4943     {
4944       tree real_cbase = cbase;
4945
4946       /* Check to see if any adjustment is needed.  */
4947       if (cstepi == 0 && stmt_is_after_inc)
4948         {
4949           aff_tree real_cbase_aff;
4950           aff_tree cstep_aff;
4951
4952           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4953                                    &real_cbase_aff);
4954           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4955
4956           aff_combination_add (&real_cbase_aff, &cstep_aff);
4957           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4958         }
4959
4960       cost = difference_cost (data,
4961                               ubase, real_cbase,
4962                               &symbol_present, &var_present, &offset,
4963                               depends_on);
4964       cost /= avg_loop_niter (data->current_loop);
4965     }
4966   else if (address_p
4967            && !POINTER_TYPE_P (ctype)
4968            && multiplier_allowed_in_address_p
4969                 (ratio, mem_mode,
4970                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4971     {
4972       tree real_cbase = cbase;
4973
4974       if (cstepi == 0 && stmt_is_after_inc)
4975         {
4976           if (POINTER_TYPE_P (ctype))
4977             real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4978           else
4979             real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4980         }
4981       real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4982                                 build_int_cst (ctype, ratio));
4983       cost = difference_cost (data,
4984                               ubase, real_cbase,
4985                               &symbol_present, &var_present, &offset,
4986                               depends_on);
4987       cost /= avg_loop_niter (data->current_loop);
4988     }
4989   else
4990     {
4991       cost = force_var_cost (data, cbase, depends_on);
4992       cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4993                                &symbol_present, &var_present, &offset,
4994                                depends_on);
4995       cost /= avg_loop_niter (data->current_loop);
4996       cost += add_cost (data->speed, TYPE_MODE (ctype));
4997     }
4998
4999   /* Record setup cost in scratch field.  */
5000   cost.scratch = cost.cost;
5001
5002   if (inv_expr && depends_on && *depends_on)
5003     {
5004       *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
5005                                            address_p);
5006       /* Clear depends on.  */
5007       if (*inv_expr != NULL)
5008         bitmap_clear (*depends_on);
5009     }
5010
5011   /* If we are after the increment, the value of the candidate is higher by
5012      one iteration.  */
5013   if (stmt_is_after_inc)
5014     offset -= ratio * cstepi;
5015
5016   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5017      (symbol/var1/const parts may be omitted).  If we are looking for an
5018      address, find the cost of addressing this.  */
5019   if (address_p)
5020     {
5021       cost += get_address_cost (symbol_present, var_present,
5022                                 offset, ratio, cstepi,
5023                                 mem_mode,
5024                                 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
5025                                 speed, stmt_is_after_inc, can_autoinc);
5026       return get_scaled_computation_cost_at (data, at, cand, cost);
5027     }
5028
5029   /* Otherwise estimate the costs for computing the expression.  */
5030   if (!symbol_present && !var_present && !offset)
5031     {
5032       if (ratio != 1)
5033         cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
5034       return get_scaled_computation_cost_at (data, at, cand, cost);
5035     }
5036
5037   /* Symbol + offset should be compile-time computable so consider that they
5038       are added once to the variable, if present.  */
5039   if (var_present && (symbol_present || offset))
5040     cost += adjust_setup_cost (data,
5041                                     add_cost (speed, TYPE_MODE (ctype)));
5042
5043   /* Having offset does not affect runtime cost in case it is added to
5044      symbol, but it increases complexity.  */
5045   if (offset)
5046     cost.complexity++;
5047
5048   cost += add_cost (speed, TYPE_MODE (ctype));
5049
5050   aratio = ratio > 0 ? ratio : -ratio;
5051   if (aratio != 1)
5052     cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5053
5054   return get_scaled_computation_cost_at (data, at, cand, cost);
5055
5056 fallback:
5057   if (can_autoinc)
5058     *can_autoinc = false;
5059
5060   /* Just get the expression, expand it and measure the cost.  */
5061   tree comp = get_computation_at (data->current_loop, use, cand, at);
5062
5063   if (!comp)
5064     return infinite_cost;
5065
5066   if (address_p)
5067     comp = build_simple_mem_ref (comp);
5068
5069   cost = comp_cost (computation_cost (comp, speed), 0);
5070
5071   return get_scaled_computation_cost_at (data, at, cand, cost);
5072 }
5073
5074 /* Determines the cost of the computation by that USE is expressed
5075    from induction variable CAND.  If ADDRESS_P is true, we just need
5076    to create an address from it, otherwise we want to get it into
5077    register.  A set of invariants we depend on is stored in
5078    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5079    autoinc addressing is likely.  */
5080
5081 static comp_cost
5082 get_computation_cost (struct ivopts_data *data,
5083                       struct iv_use *use, struct iv_cand *cand,
5084                       bool address_p, bitmap *depends_on,
5085                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5086 {
5087   return get_computation_cost_at (data,
5088                                   use, cand, address_p, depends_on, use->stmt,
5089                                   can_autoinc, inv_expr);
5090 }
5091
5092 /* Determines cost of computing the use in GROUP with CAND in a generic
5093    expression.  */
5094
5095 static bool
5096 determine_group_iv_cost_generic (struct ivopts_data *data,
5097                                  struct iv_group *group, struct iv_cand *cand)
5098 {
5099   comp_cost cost;
5100   iv_inv_expr_ent *inv_expr = NULL;
5101   bitmap depends_on = NULL;
5102   struct iv_use *use = group->vuses[0];
5103
5104   /* The simple case first -- if we need to express value of the preserved
5105      original biv, the cost is 0.  This also prevents us from counting the
5106      cost of increment twice -- once at this use and once in the cost of
5107      the candidate.  */
5108   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5109     cost = no_cost;
5110   else
5111     cost = get_computation_cost (data, use, cand, false,
5112                                  &depends_on, NULL, &inv_expr);
5113
5114   set_group_iv_cost (data, group, cand, cost, depends_on,
5115                      NULL_TREE, ERROR_MARK, inv_expr);
5116   return !cost.infinite_cost_p ();
5117 }
5118
5119 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5120
5121 static bool
5122 determine_group_iv_cost_address (struct ivopts_data *data,
5123                                  struct iv_group *group, struct iv_cand *cand)
5124 {
5125   unsigned i;
5126   bitmap depends_on;
5127   bool can_autoinc;
5128   iv_inv_expr_ent *inv_expr = NULL;
5129   struct iv_use *use = group->vuses[0];
5130   comp_cost sum_cost = no_cost, cost;
5131
5132   cost = get_computation_cost (data, use, cand, true,
5133                                &depends_on, &can_autoinc, &inv_expr);
5134
5135   sum_cost = cost;
5136   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5137     {
5138       if (can_autoinc)
5139         sum_cost -= cand->cost_step;
5140       /* If we generated the candidate solely for exploiting autoincrement
5141          opportunities, and it turns out it can't be used, set the cost to
5142          infinity to make sure we ignore it.  */
5143       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5144         sum_cost = infinite_cost;
5145     }
5146
5147   /* Uses in a group can share setup code, so only add setup cost once.  */
5148   cost -= cost.scratch;
5149   /* Compute and add costs for rest uses of this group.  */
5150   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5151     {
5152       struct iv_use *next = group->vuses[i];
5153
5154       /* TODO: We could skip computing cost for sub iv_use when it has the
5155          same cost as the first iv_use, but the cost really depends on the
5156          offset and where the iv_use is.  */
5157         cost = get_computation_cost (data, next, cand, true,
5158                                      NULL, &can_autoinc, NULL);
5159       sum_cost += cost;
5160     }
5161   set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5162                      NULL_TREE, ERROR_MARK, inv_expr);
5163
5164   return !sum_cost.infinite_cost_p ();
5165 }
5166
5167 /* Computes value of candidate CAND at position AT in iteration NITER, and
5168    stores it to VAL.  */
5169
5170 static void
5171 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5172                aff_tree *val)
5173 {
5174   aff_tree step, delta, nit;
5175   struct iv *iv = cand->iv;
5176   tree type = TREE_TYPE (iv->base);
5177   tree steptype;
5178   if (POINTER_TYPE_P (type))
5179     steptype = sizetype;
5180   else
5181     steptype = unsigned_type_for (type);
5182
5183   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5184   aff_combination_convert (&step, steptype);
5185   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5186   aff_combination_convert (&nit, steptype);
5187   aff_combination_mult (&nit, &step, &delta);
5188   if (stmt_after_increment (loop, cand, at))
5189     aff_combination_add (&delta, &step);
5190
5191   tree_to_aff_combination (iv->base, type, val);
5192   if (!POINTER_TYPE_P (type))
5193     aff_combination_convert (val, steptype);
5194   aff_combination_add (val, &delta);
5195 }
5196
5197 /* Returns period of induction variable iv.  */
5198
5199 static tree
5200 iv_period (struct iv *iv)
5201 {
5202   tree step = iv->step, period, type;
5203   tree pow2div;
5204
5205   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5206
5207   type = unsigned_type_for (TREE_TYPE (step));
5208   /* Period of the iv is lcm (step, type_range)/step -1,
5209      i.e., N*type_range/step - 1. Since type range is power
5210      of two, N == (step >> num_of_ending_zeros_binary (step),
5211      so the final result is
5212
5213        (type_range >> num_of_ending_zeros_binary (step)) - 1
5214
5215   */
5216   pow2div = num_ending_zeros (step);
5217
5218   period = build_low_bits_mask (type,
5219                                 (TYPE_PRECISION (type)
5220                                  - tree_to_uhwi (pow2div)));
5221
5222   return period;
5223 }
5224
5225 /* Returns the comparison operator used when eliminating the iv USE.  */
5226
5227 static enum tree_code
5228 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5229 {
5230   struct loop *loop = data->current_loop;
5231   basic_block ex_bb;
5232   edge exit;
5233
5234   ex_bb = gimple_bb (use->stmt);
5235   exit = EDGE_SUCC (ex_bb, 0);
5236   if (flow_bb_inside_loop_p (loop, exit->dest))
5237     exit = EDGE_SUCC (ex_bb, 1);
5238
5239   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5240 }
5241
5242 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5243    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5244    calculation is performed in non-wrapping type.
5245
5246    TODO: More generally, we could test for the situation that
5247          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5248          This would require knowing the sign of OFFSET.  */
5249
5250 static bool
5251 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5252 {
5253   enum tree_code code;
5254   tree e1, e2;
5255   aff_tree aff_e1, aff_e2, aff_offset;
5256
5257   if (!nowrap_type_p (TREE_TYPE (base)))
5258     return false;
5259
5260   base = expand_simple_operations (base);
5261
5262   if (TREE_CODE (base) == SSA_NAME)
5263     {
5264       gimple *stmt = SSA_NAME_DEF_STMT (base);
5265
5266       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5267         return false;
5268
5269       code = gimple_assign_rhs_code (stmt);
5270       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5271         return false;
5272
5273       e1 = gimple_assign_rhs1 (stmt);
5274       e2 = gimple_assign_rhs2 (stmt);
5275     }
5276   else
5277     {
5278       code = TREE_CODE (base);
5279       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5280         return false;
5281       e1 = TREE_OPERAND (base, 0);
5282       e2 = TREE_OPERAND (base, 1);
5283     }
5284
5285   /* Use affine expansion as deeper inspection to prove the equality.  */
5286   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5287                                   &aff_e2, &data->name_expansion_cache);
5288   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5289                                   &aff_offset, &data->name_expansion_cache);
5290   aff_combination_scale (&aff_offset, -1);
5291   switch (code)
5292     {
5293     case PLUS_EXPR:
5294       aff_combination_add (&aff_e2, &aff_offset);
5295       if (aff_combination_zero_p (&aff_e2))
5296         return true;
5297
5298       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5299                                       &aff_e1, &data->name_expansion_cache);
5300       aff_combination_add (&aff_e1, &aff_offset);
5301       return aff_combination_zero_p (&aff_e1);
5302
5303     case POINTER_PLUS_EXPR:
5304       aff_combination_add (&aff_e2, &aff_offset);
5305       return aff_combination_zero_p (&aff_e2);
5306
5307     default:
5308       return false;
5309     }
5310 }
5311
5312 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5313    comparison with CAND.  NITER describes the number of iterations of
5314    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5315
5316    We aim to handle the following situation:
5317
5318    sometype *base, *p;
5319    int a, b, i;
5320
5321    i = a;
5322    p = p_0 = base + a;
5323
5324    do
5325      {
5326        bla (*p);
5327        p++;
5328        i++;
5329      }
5330    while (i < b);
5331
5332    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5333    We aim to optimize this to
5334
5335    p = p_0 = base + a;
5336    do
5337      {
5338        bla (*p);
5339        p++;
5340      }
5341    while (p < p_0 - a + b);
5342
5343    This preserves the correctness, since the pointer arithmetics does not
5344    overflow.  More precisely:
5345
5346    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5347       overflow in computing it or the values of p.
5348    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5349       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5350
5351 static bool
5352 iv_elimination_compare_lt (struct ivopts_data *data,
5353                            struct iv_cand *cand, enum tree_code *comp_p,
5354                            struct tree_niter_desc *niter)
5355 {
5356   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5357   struct aff_tree nit, tmpa, tmpb;
5358   enum tree_code comp;
5359   HOST_WIDE_INT step;
5360
5361   /* We need to know that the candidate induction variable does not overflow.
5362      While more complex analysis may be used to prove this, for now just
5363      check that the variable appears in the original program and that it
5364      is computed in a type that guarantees no overflows.  */
5365   cand_type = TREE_TYPE (cand->iv->base);
5366   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5367     return false;
5368
5369   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5370      the calculation of the BOUND could overflow, making the comparison
5371      invalid.  */
5372   if (!data->loop_single_exit_p)
5373     return false;
5374
5375   /* We need to be able to decide whether candidate is increasing or decreasing
5376      in order to choose the right comparison operator.  */
5377   if (!cst_and_fits_in_hwi (cand->iv->step))
5378     return false;
5379   step = int_cst_value (cand->iv->step);
5380
5381   /* Check that the number of iterations matches the expected pattern:
5382      a + 1 > b ? 0 : b - a - 1.  */
5383   mbz = niter->may_be_zero;
5384   if (TREE_CODE (mbz) == GT_EXPR)
5385     {
5386       /* Handle a + 1 > b.  */
5387       tree op0 = TREE_OPERAND (mbz, 0);
5388       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5389         {
5390           a = TREE_OPERAND (op0, 0);
5391           b = TREE_OPERAND (mbz, 1);
5392         }
5393       else
5394         return false;
5395     }
5396   else if (TREE_CODE (mbz) == LT_EXPR)
5397     {
5398       tree op1 = TREE_OPERAND (mbz, 1);
5399
5400       /* Handle b < a + 1.  */
5401       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5402         {
5403           a = TREE_OPERAND (op1, 0);
5404           b = TREE_OPERAND (mbz, 0);
5405         }
5406       else
5407         return false;
5408     }
5409   else
5410     return false;
5411
5412   /* Expected number of iterations is B - A - 1.  Check that it matches
5413      the actual number, i.e., that B - A - NITER = 1.  */
5414   tree_to_aff_combination (niter->niter, nit_type, &nit);
5415   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5416   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5417   aff_combination_scale (&nit, -1);
5418   aff_combination_scale (&tmpa, -1);
5419   aff_combination_add (&tmpb, &tmpa);
5420   aff_combination_add (&tmpb, &nit);
5421   if (tmpb.n != 0 || tmpb.offset != 1)
5422     return false;
5423
5424   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5425      overflow.  */
5426   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5427                         cand->iv->step,
5428                         fold_convert (TREE_TYPE (cand->iv->step), a));
5429   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5430     return false;
5431
5432   /* Determine the new comparison operator.  */
5433   comp = step < 0 ? GT_EXPR : LT_EXPR;
5434   if (*comp_p == NE_EXPR)
5435     *comp_p = comp;
5436   else if (*comp_p == EQ_EXPR)
5437     *comp_p = invert_tree_comparison (comp, false);
5438   else
5439     gcc_unreachable ();
5440
5441   return true;
5442 }
5443
5444 /* Check whether it is possible to express the condition in USE by comparison
5445    of candidate CAND.  If so, store the value compared with to BOUND, and the
5446    comparison operator to COMP.  */
5447
5448 static bool
5449 may_eliminate_iv (struct ivopts_data *data,
5450                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5451                   enum tree_code *comp)
5452 {
5453   basic_block ex_bb;
5454   edge exit;
5455   tree period;
5456   struct loop *loop = data->current_loop;
5457   aff_tree bnd;
5458   struct tree_niter_desc *desc = NULL;
5459
5460   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5461     return false;
5462
5463   /* For now works only for exits that dominate the loop latch.
5464      TODO: extend to other conditions inside loop body.  */
5465   ex_bb = gimple_bb (use->stmt);
5466   if (use->stmt != last_stmt (ex_bb)
5467       || gimple_code (use->stmt) != GIMPLE_COND
5468       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5469     return false;
5470
5471   exit = EDGE_SUCC (ex_bb, 0);
5472   if (flow_bb_inside_loop_p (loop, exit->dest))
5473     exit = EDGE_SUCC (ex_bb, 1);
5474   if (flow_bb_inside_loop_p (loop, exit->dest))
5475     return false;
5476
5477   desc = niter_for_exit (data, exit);
5478   if (!desc)
5479     return false;
5480
5481   /* Determine whether we can use the variable to test the exit condition.
5482      This is the case iff the period of the induction variable is greater
5483      than the number of iterations for which the exit condition is true.  */
5484   period = iv_period (cand->iv);
5485
5486   /* If the number of iterations is constant, compare against it directly.  */
5487   if (TREE_CODE (desc->niter) == INTEGER_CST)
5488     {
5489       /* See cand_value_at.  */
5490       if (stmt_after_increment (loop, cand, use->stmt))
5491         {
5492           if (!tree_int_cst_lt (desc->niter, period))
5493             return false;
5494         }
5495       else
5496         {
5497           if (tree_int_cst_lt (period, desc->niter))
5498             return false;
5499         }
5500     }
5501
5502   /* If not, and if this is the only possible exit of the loop, see whether
5503      we can get a conservative estimate on the number of iterations of the
5504      entire loop and compare against that instead.  */
5505   else
5506     {
5507       widest_int period_value, max_niter;
5508
5509       max_niter = desc->max;
5510       if (stmt_after_increment (loop, cand, use->stmt))
5511         max_niter += 1;
5512       period_value = wi::to_widest (period);
5513       if (wi::gtu_p (max_niter, period_value))
5514         {
5515           /* See if we can take advantage of inferred loop bound
5516              information.  */
5517           if (data->loop_single_exit_p)
5518             {
5519               if (!max_loop_iterations (loop, &max_niter))
5520                 return false;
5521               /* The loop bound is already adjusted by adding 1.  */
5522               if (wi::gtu_p (max_niter, period_value))
5523                 return false;
5524             }
5525           else
5526             return false;
5527         }
5528     }
5529
5530   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5531
5532   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5533                          aff_combination_to_tree (&bnd));
5534   *comp = iv_elimination_compare (data, use);
5535
5536   /* It is unlikely that computing the number of iterations using division
5537      would be more profitable than keeping the original induction variable.  */
5538   if (expression_expensive_p (*bound))
5539     return false;
5540
5541   /* Sometimes, it is possible to handle the situation that the number of
5542      iterations may be zero unless additional assumtions by using <
5543      instead of != in the exit condition.
5544
5545      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5546            base the exit condition on it.  However, that is often too
5547            expensive.  */
5548   if (!integer_zerop (desc->may_be_zero))
5549     return iv_elimination_compare_lt (data, cand, comp, desc);
5550
5551   return true;
5552 }
5553
5554  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5555     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5556
5557 static int
5558 parm_decl_cost (struct ivopts_data *data, tree bound)
5559 {
5560   tree sbound = bound;
5561   STRIP_NOPS (sbound);
5562
5563   if (TREE_CODE (sbound) == SSA_NAME
5564       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5565       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5566       && data->body_includes_call)
5567     return COSTS_N_INSNS (1);
5568
5569   return 0;
5570 }
5571
5572 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5573
5574 static bool
5575 determine_group_iv_cost_cond (struct ivopts_data *data,
5576                               struct iv_group *group, struct iv_cand *cand)
5577 {
5578   tree bound = NULL_TREE;
5579   struct iv *cmp_iv;
5580   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5581   comp_cost elim_cost, express_cost, cost, bound_cost;
5582   bool ok;
5583   iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5584   tree *control_var, *bound_cst;
5585   enum tree_code comp = ERROR_MARK;
5586   struct iv_use *use = group->vuses[0];
5587
5588   gcc_assert (cand->iv);
5589
5590   /* Try iv elimination.  */
5591   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5592     {
5593       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5594       if (elim_cost.cost == 0)
5595         elim_cost.cost = parm_decl_cost (data, bound);
5596       else if (TREE_CODE (bound) == INTEGER_CST)
5597         elim_cost.cost = 0;
5598       /* If we replace a loop condition 'i < n' with 'p < base + n',
5599          depends_on_elim will have 'base' and 'n' set, which implies
5600          that both 'base' and 'n' will be live during the loop.  More likely,
5601          'base + n' will be loop invariant, resulting in only one live value
5602          during the loop.  So in that case we clear depends_on_elim and set
5603         elim_inv_expr_id instead.  */
5604       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5605         {
5606           elim_inv_expr = record_inv_expr (data, bound);
5607           bitmap_clear (depends_on_elim);
5608         }
5609       /* The bound is a loop invariant, so it will be only computed
5610          once.  */
5611       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5612     }
5613   else
5614     elim_cost = infinite_cost;
5615
5616   /* Try expressing the original giv.  If it is compared with an invariant,
5617      note that we cannot get rid of it.  */
5618   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5619                               NULL, &cmp_iv);
5620   gcc_assert (ok);
5621
5622   /* When the condition is a comparison of the candidate IV against
5623      zero, prefer this IV.
5624
5625      TODO: The constant that we're subtracting from the cost should
5626      be target-dependent.  This information should be added to the
5627      target costs for each backend.  */
5628   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5629       && integer_zerop (*bound_cst)
5630       && (operand_equal_p (*control_var, cand->var_after, 0)
5631           || operand_equal_p (*control_var, cand->var_before, 0)))
5632     elim_cost -= 1;
5633
5634   express_cost = get_computation_cost (data, use, cand, false,
5635                                        &depends_on_express, NULL,
5636                                        &express_inv_expr);
5637   fd_ivopts_data = data;
5638   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5639
5640   /* Count the cost of the original bound as well.  */
5641   bound_cost = force_var_cost (data, *bound_cst, NULL);
5642   if (bound_cost.cost == 0)
5643     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5644   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5645     bound_cost.cost = 0;
5646   express_cost += bound_cost;
5647
5648   /* Choose the better approach, preferring the eliminated IV. */
5649   if (elim_cost <= express_cost)
5650     {
5651       cost = elim_cost;
5652       depends_on = depends_on_elim;
5653       depends_on_elim = NULL;
5654       inv_expr = elim_inv_expr;
5655     }
5656   else
5657     {
5658       cost = express_cost;
5659       depends_on = depends_on_express;
5660       depends_on_express = NULL;
5661       bound = NULL_TREE;
5662       comp = ERROR_MARK;
5663       inv_expr = express_inv_expr;
5664     }
5665
5666   set_group_iv_cost (data, group, cand, cost,
5667                      depends_on, bound, comp, inv_expr);
5668
5669   if (depends_on_elim)
5670     BITMAP_FREE (depends_on_elim);
5671   if (depends_on_express)
5672     BITMAP_FREE (depends_on_express);
5673
5674   return !cost.infinite_cost_p ();
5675 }
5676
5677 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5678    if USE cannot be represented with CAND.  */
5679
5680 static bool
5681 determine_group_iv_cost (struct ivopts_data *data,
5682                          struct iv_group *group, struct iv_cand *cand)
5683 {
5684   switch (group->type)
5685     {
5686     case USE_NONLINEAR_EXPR:
5687       return determine_group_iv_cost_generic (data, group, cand);
5688
5689     case USE_ADDRESS:
5690       return determine_group_iv_cost_address (data, group, cand);
5691
5692     case USE_COMPARE:
5693       return determine_group_iv_cost_cond (data, group, cand);
5694
5695     default:
5696       gcc_unreachable ();
5697     }
5698 }
5699
5700 /* Return true if get_computation_cost indicates that autoincrement is
5701    a possibility for the pair of USE and CAND, false otherwise.  */
5702
5703 static bool
5704 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5705                            struct iv_cand *cand)
5706 {
5707   bitmap depends_on;
5708   bool can_autoinc;
5709   comp_cost cost;
5710
5711   if (use->type != USE_ADDRESS)
5712     return false;
5713
5714   cost = get_computation_cost (data, use, cand, true, &depends_on,
5715                                &can_autoinc, NULL);
5716
5717   BITMAP_FREE (depends_on);
5718
5719   return !cost.infinite_cost_p () && can_autoinc;
5720 }
5721
5722 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5723    use that allows autoincrement, and set their AINC_USE if possible.  */
5724
5725 static void
5726 set_autoinc_for_original_candidates (struct ivopts_data *data)
5727 {
5728   unsigned i, j;
5729
5730   for (i = 0; i < data->vcands.length (); i++)
5731     {
5732       struct iv_cand *cand = data->vcands[i];
5733       struct iv_use *closest_before = NULL;
5734       struct iv_use *closest_after = NULL;
5735       if (cand->pos != IP_ORIGINAL)
5736         continue;
5737
5738       for (j = 0; j < data->vgroups.length (); j++)
5739         {
5740           struct iv_group *group = data->vgroups[j];
5741           struct iv_use *use = group->vuses[0];
5742           unsigned uid = gimple_uid (use->stmt);
5743
5744           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5745             continue;
5746
5747           if (uid < gimple_uid (cand->incremented_at)
5748               && (closest_before == NULL
5749                   || uid > gimple_uid (closest_before->stmt)))
5750             closest_before = use;
5751
5752           if (uid > gimple_uid (cand->incremented_at)
5753               && (closest_after == NULL
5754                   || uid < gimple_uid (closest_after->stmt)))
5755             closest_after = use;
5756         }
5757
5758       if (closest_before != NULL
5759           && autoinc_possible_for_pair (data, closest_before, cand))
5760         cand->ainc_use = closest_before;
5761       else if (closest_after != NULL
5762                && autoinc_possible_for_pair (data, closest_after, cand))
5763         cand->ainc_use = closest_after;
5764     }
5765 }
5766
5767 /* Finds the candidates for the induction variables.  */
5768
5769 static void
5770 find_iv_candidates (struct ivopts_data *data)
5771 {
5772   /* Add commonly used ivs.  */
5773   add_standard_iv_candidates (data);
5774
5775   /* Add old induction variables.  */
5776   add_iv_candidate_for_bivs (data);
5777
5778   /* Add induction variables derived from uses.  */
5779   add_iv_candidate_for_groups (data);
5780
5781   set_autoinc_for_original_candidates (data);
5782
5783   /* Record the important candidates.  */
5784   record_important_candidates (data);
5785
5786   if (dump_file && (dump_flags & TDF_DETAILS))
5787     {
5788       unsigned i;
5789
5790       fprintf (dump_file, "\n<Important Candidates>:\t");
5791       for (i = 0; i < data->vcands.length (); i++)
5792         if (data->vcands[i]->important)
5793           fprintf (dump_file, " %d,", data->vcands[i]->id);
5794       fprintf (dump_file, "\n");
5795
5796       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5797       for (i = 0; i < data->vgroups.length (); i++)
5798         {
5799           struct iv_group *group = data->vgroups[i];
5800
5801           if (group->related_cands)
5802             {
5803               fprintf (dump_file, "  Group %d:\t", group->id);
5804               dump_bitmap (dump_file, group->related_cands);
5805             }
5806         }
5807       fprintf (dump_file, "\n");
5808     }
5809 }
5810
5811 /* Determines costs of computing use of iv with an iv candidate.  */
5812
5813 static void
5814 determine_group_iv_costs (struct ivopts_data *data)
5815 {
5816   unsigned i, j;
5817   struct iv_cand *cand;
5818   struct iv_group *group;
5819   bitmap to_clear = BITMAP_ALLOC (NULL);
5820
5821   alloc_use_cost_map (data);
5822
5823   for (i = 0; i < data->vgroups.length (); i++)
5824     {
5825       group = data->vgroups[i];
5826
5827       if (data->consider_all_candidates)
5828         {
5829           for (j = 0; j < data->vcands.length (); j++)
5830             {
5831               cand = data->vcands[j];
5832               determine_group_iv_cost (data, group, cand);
5833             }
5834         }
5835       else
5836         {
5837           bitmap_iterator bi;
5838
5839           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5840             {
5841               cand = data->vcands[j];
5842               if (!determine_group_iv_cost (data, group, cand))
5843                 bitmap_set_bit (to_clear, j);
5844             }
5845
5846           /* Remove the candidates for that the cost is infinite from
5847              the list of related candidates.  */
5848           bitmap_and_compl_into (group->related_cands, to_clear);
5849           bitmap_clear (to_clear);
5850         }
5851     }
5852
5853   BITMAP_FREE (to_clear);
5854
5855   if (dump_file && (dump_flags & TDF_DETAILS))
5856     {
5857       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5858       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5859
5860       for (hash_table<iv_inv_expr_hasher>::iterator it
5861            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5862            ++it)
5863         list.safe_push (*it);
5864
5865       list.qsort (sort_iv_inv_expr_ent);
5866
5867       for (i = 0; i < list.length (); ++i)
5868         {
5869           fprintf (dump_file, "inv_expr %d: \t", i);
5870           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5871           fprintf (dump_file, "\n");
5872         }
5873
5874       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5875
5876       for (i = 0; i < data->vgroups.length (); i++)
5877         {
5878           group = data->vgroups[i];
5879
5880           fprintf (dump_file, "Group %d:\n", i);
5881           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5882           for (j = 0; j < group->n_map_members; j++)
5883             {
5884               if (!group->cost_map[j].cand
5885                   || group->cost_map[j].cost.infinite_cost_p ())
5886                 continue;
5887
5888               fprintf (dump_file, "  %d\t%d\t%d\t",
5889                        group->cost_map[j].cand->id,
5890                        group->cost_map[j].cost.cost,
5891                        group->cost_map[j].cost.complexity);
5892               if (group->cost_map[j].inv_expr != NULL)
5893                 fprintf (dump_file, "%d\t",
5894                          group->cost_map[j].inv_expr->id);
5895               else
5896                 fprintf (dump_file, "\t");
5897               if (group->cost_map[j].depends_on)
5898                 bitmap_print (dump_file,
5899                               group->cost_map[j].depends_on, "","");
5900               fprintf (dump_file, "\n");
5901             }
5902
5903           fprintf (dump_file, "\n");
5904         }
5905       fprintf (dump_file, "\n");
5906     }
5907 }
5908
5909 /* Determines cost of the candidate CAND.  */
5910
5911 static void
5912 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5913 {
5914   comp_cost cost_base;
5915   unsigned cost, cost_step;
5916   tree base;
5917
5918   if (!cand->iv)
5919     {
5920       cand->cost = 0;
5921       return;
5922     }
5923
5924   /* There are two costs associated with the candidate -- its increment
5925      and its initialization.  The second is almost negligible for any loop
5926      that rolls enough, so we take it just very little into account.  */
5927
5928   base = cand->iv->base;
5929   cost_base = force_var_cost (data, base, NULL);
5930   /* It will be exceptional that the iv register happens to be initialized with
5931      the proper value at no cost.  In general, there will at least be a regcopy
5932      or a const set.  */
5933   if (cost_base.cost == 0)
5934     cost_base.cost = COSTS_N_INSNS (1);
5935   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5936
5937   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5938
5939   /* Prefer the original ivs unless we may gain something by replacing it.
5940      The reason is to make debugging simpler; so this is not relevant for
5941      artificial ivs created by other optimization passes.  */
5942   if (cand->pos != IP_ORIGINAL
5943       || !SSA_NAME_VAR (cand->var_before)
5944       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5945     cost++;
5946
5947   /* Prefer not to insert statements into latch unless there are some
5948      already (so that we do not create unnecessary jumps).  */
5949   if (cand->pos == IP_END
5950       && empty_block_p (ip_end_pos (data->current_loop)))
5951     cost++;
5952
5953   cand->cost = cost;
5954   cand->cost_step = cost_step;
5955 }
5956
5957 /* Determines costs of computation of the candidates.  */
5958
5959 static void
5960 determine_iv_costs (struct ivopts_data *data)
5961 {
5962   unsigned i;
5963
5964   if (dump_file && (dump_flags & TDF_DETAILS))
5965     {
5966       fprintf (dump_file, "<Candidate Costs>:\n");
5967       fprintf (dump_file, "  cand\tcost\n");
5968     }
5969
5970   for (i = 0; i < data->vcands.length (); i++)
5971     {
5972       struct iv_cand *cand = data->vcands[i];
5973
5974       determine_iv_cost (data, cand);
5975
5976       if (dump_file && (dump_flags & TDF_DETAILS))
5977         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5978     }
5979
5980   if (dump_file && (dump_flags & TDF_DETAILS))
5981     fprintf (dump_file, "\n");
5982 }
5983
5984 /* Calculates cost for having SIZE induction variables.  */
5985
5986 static unsigned
5987 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5988 {
5989   /* We add size to the cost, so that we prefer eliminating ivs
5990      if possible.  */
5991   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5992                                             data->body_includes_call);
5993 }
5994
5995 /* For each size of the induction variable set determine the penalty.  */
5996
5997 static void
5998 determine_set_costs (struct ivopts_data *data)
5999 {
6000   unsigned j, n;
6001   gphi *phi;
6002   gphi_iterator psi;
6003   tree op;
6004   struct loop *loop = data->current_loop;
6005   bitmap_iterator bi;
6006
6007   if (dump_file && (dump_flags & TDF_DETAILS))
6008     {
6009       fprintf (dump_file, "<Global Costs>:\n");
6010       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6011       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6012       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6013       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6014     }
6015
6016   n = 0;
6017   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6018     {
6019       phi = psi.phi ();
6020       op = PHI_RESULT (phi);
6021
6022       if (virtual_operand_p (op))
6023         continue;
6024
6025       if (get_iv (data, op))
6026         continue;
6027
6028       n++;
6029     }
6030
6031   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6032     {
6033       struct version_info *info = ver_info (data, j);
6034
6035       if (info->inv_id && info->has_nonlin_use)
6036         n++;
6037     }
6038
6039   data->regs_used = n;
6040   if (dump_file && (dump_flags & TDF_DETAILS))
6041     fprintf (dump_file, "  regs_used %d\n", n);
6042
6043   if (dump_file && (dump_flags & TDF_DETAILS))
6044     {
6045       fprintf (dump_file, "  cost for size:\n");
6046       fprintf (dump_file, "  ivs\tcost\n");
6047       for (j = 0; j <= 2 * target_avail_regs; j++)
6048         fprintf (dump_file, "  %d\t%d\n", j,
6049                  ivopts_global_cost_for_size (data, j));
6050       fprintf (dump_file, "\n");
6051     }
6052 }
6053
6054 /* Returns true if A is a cheaper cost pair than B.  */
6055
6056 static bool
6057 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6058 {
6059   if (!a)
6060     return false;
6061
6062   if (!b)
6063     return true;
6064
6065   if (a->cost < b->cost)
6066     return true;
6067
6068   if (b->cost < a->cost)
6069     return false;
6070
6071   /* In case the costs are the same, prefer the cheaper candidate.  */
6072   if (a->cand->cost < b->cand->cost)
6073     return true;
6074
6075   return false;
6076 }
6077
6078
6079 /* Returns candidate by that USE is expressed in IVS.  */
6080
6081 static struct cost_pair *
6082 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6083 {
6084   return ivs->cand_for_group[group->id];
6085 }
6086
6087 /* Computes the cost field of IVS structure.  */
6088
6089 static void
6090 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6091 {
6092   comp_cost cost = ivs->cand_use_cost;
6093
6094   cost += ivs->cand_cost;
6095
6096   cost += ivopts_global_cost_for_size (data,
6097                                        ivs->n_regs
6098                                        + ivs->used_inv_exprs->elements ());
6099
6100   ivs->cost = cost;
6101 }
6102
6103 /* Remove invariants in set INVS to set IVS.  */
6104
6105 static void
6106 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6107 {
6108   bitmap_iterator bi;
6109   unsigned iid;
6110
6111   if (!invs)
6112     return;
6113
6114   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6115     {
6116       ivs->n_invariant_uses[iid]--;
6117       if (ivs->n_invariant_uses[iid] == 0)
6118         ivs->n_regs--;
6119     }
6120 }
6121
6122 /* Set USE not to be expressed by any candidate in IVS.  */
6123
6124 static void
6125 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6126                  struct iv_group *group)
6127 {
6128   unsigned gid = group->id, cid;
6129   struct cost_pair *cp;
6130
6131   cp = ivs->cand_for_group[gid];
6132   if (!cp)
6133     return;
6134   cid = cp->cand->id;
6135
6136   ivs->bad_groups++;
6137   ivs->cand_for_group[gid] = NULL;
6138   ivs->n_cand_uses[cid]--;
6139
6140   if (ivs->n_cand_uses[cid] == 0)
6141     {
6142       bitmap_clear_bit (ivs->cands, cid);
6143       /* Do not count the pseudocandidates.  */
6144       if (cp->cand->iv)
6145         ivs->n_regs--;
6146       ivs->n_cands--;
6147       ivs->cand_cost -= cp->cand->cost;
6148
6149       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6150     }
6151
6152   ivs->cand_use_cost -= cp->cost;
6153
6154   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6155
6156   if (cp->inv_expr != NULL)
6157     {
6158       unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6159       --(*slot);
6160       if (*slot == 0)
6161         ivs->used_inv_exprs->remove (cp->inv_expr);
6162     }
6163   iv_ca_recount_cost (data, ivs);
6164 }
6165
6166 /* Add invariants in set INVS to set IVS.  */
6167
6168 static void
6169 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6170 {
6171   bitmap_iterator bi;
6172   unsigned iid;
6173
6174   if (!invs)
6175     return;
6176
6177   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6178     {
6179       ivs->n_invariant_uses[iid]++;
6180       if (ivs->n_invariant_uses[iid] == 1)
6181         ivs->n_regs++;
6182     }
6183 }
6184
6185 /* Set cost pair for GROUP in set IVS to CP.  */
6186
6187 static void
6188 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6189               struct iv_group *group, struct cost_pair *cp)
6190 {
6191   unsigned gid = group->id, cid;
6192
6193   if (ivs->cand_for_group[gid] == cp)
6194     return;
6195
6196   if (ivs->cand_for_group[gid])
6197     iv_ca_set_no_cp (data, ivs, group);
6198
6199   if (cp)
6200     {
6201       cid = cp->cand->id;
6202
6203       ivs->bad_groups--;
6204       ivs->cand_for_group[gid] = cp;
6205       ivs->n_cand_uses[cid]++;
6206       if (ivs->n_cand_uses[cid] == 1)
6207         {
6208           bitmap_set_bit (ivs->cands, cid);
6209           /* Do not count the pseudocandidates.  */
6210           if (cp->cand->iv)
6211             ivs->n_regs++;
6212           ivs->n_cands++;
6213           ivs->cand_cost += cp->cand->cost;
6214
6215           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6216         }
6217
6218       ivs->cand_use_cost += cp->cost;
6219       iv_ca_set_add_invariants (ivs, cp->depends_on);
6220
6221       if (cp->inv_expr != NULL)
6222         {
6223           unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6224           ++(*slot);
6225         }
6226       iv_ca_recount_cost (data, ivs);
6227     }
6228 }
6229
6230 /* Extend set IVS by expressing USE by some of the candidates in it
6231    if possible.  Consider all important candidates if candidates in
6232    set IVS don't give any result.  */
6233
6234 static void
6235 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6236                struct iv_group *group)
6237 {
6238   struct cost_pair *best_cp = NULL, *cp;
6239   bitmap_iterator bi;
6240   unsigned i;
6241   struct iv_cand *cand;
6242
6243   gcc_assert (ivs->upto >= group->id);
6244   ivs->upto++;
6245   ivs->bad_groups++;
6246
6247   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6248     {
6249       cand = data->vcands[i];
6250       cp = get_group_iv_cost (data, group, cand);
6251       if (cheaper_cost_pair (cp, best_cp))
6252         best_cp = cp;
6253     }
6254
6255   if (best_cp == NULL)
6256     {
6257       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6258         {
6259           cand = data->vcands[i];
6260           cp = get_group_iv_cost (data, group, cand);
6261           if (cheaper_cost_pair (cp, best_cp))
6262             best_cp = cp;
6263         }
6264     }
6265
6266   iv_ca_set_cp (data, ivs, group, best_cp);
6267 }
6268
6269 /* Get cost for assignment IVS.  */
6270
6271 static comp_cost
6272 iv_ca_cost (struct iv_ca *ivs)
6273 {
6274   /* This was a conditional expression but it triggered a bug in
6275      Sun C 5.5.  */
6276   if (ivs->bad_groups)
6277     return infinite_cost;
6278   else
6279     return ivs->cost;
6280 }
6281
6282 /* Returns true if all dependences of CP are among invariants in IVS.  */
6283
6284 static bool
6285 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6286 {
6287   unsigned i;
6288   bitmap_iterator bi;
6289
6290   if (!cp->depends_on)
6291     return true;
6292
6293   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6294     {
6295       if (ivs->n_invariant_uses[i] == 0)
6296         return false;
6297     }
6298
6299   return true;
6300 }
6301
6302 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6303    it before NEXT.  */
6304
6305 static struct iv_ca_delta *
6306 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6307                  struct cost_pair *new_cp, struct iv_ca_delta *next)
6308 {
6309   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6310
6311   change->group = group;
6312   change->old_cp = old_cp;
6313   change->new_cp = new_cp;
6314   change->next = next;
6315
6316   return change;
6317 }
6318
6319 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6320    are rewritten.  */
6321
6322 static struct iv_ca_delta *
6323 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6324 {
6325   struct iv_ca_delta *last;
6326
6327   if (!l2)
6328     return l1;
6329
6330   if (!l1)
6331     return l2;
6332
6333   for (last = l1; last->next; last = last->next)
6334     continue;
6335   last->next = l2;
6336
6337   return l1;
6338 }
6339
6340 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6341
6342 static struct iv_ca_delta *
6343 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6344 {
6345   struct iv_ca_delta *act, *next, *prev = NULL;
6346
6347   for (act = delta; act; act = next)
6348     {
6349       next = act->next;
6350       act->next = prev;
6351       prev = act;
6352
6353       std::swap (act->old_cp, act->new_cp);
6354     }
6355
6356   return prev;
6357 }
6358
6359 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6360    reverted instead.  */
6361
6362 static void
6363 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6364                     struct iv_ca_delta *delta, bool forward)
6365 {
6366   struct cost_pair *from, *to;
6367   struct iv_ca_delta *act;
6368
6369   if (!forward)
6370     delta = iv_ca_delta_reverse (delta);
6371
6372   for (act = delta; act; act = act->next)
6373     {
6374       from = act->old_cp;
6375       to = act->new_cp;
6376       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6377       iv_ca_set_cp (data, ivs, act->group, to);
6378     }
6379
6380   if (!forward)
6381     iv_ca_delta_reverse (delta);
6382 }
6383
6384 /* Returns true if CAND is used in IVS.  */
6385
6386 static bool
6387 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6388 {
6389   return ivs->n_cand_uses[cand->id] > 0;
6390 }
6391
6392 /* Returns number of induction variable candidates in the set IVS.  */
6393
6394 static unsigned
6395 iv_ca_n_cands (struct iv_ca *ivs)
6396 {
6397   return ivs->n_cands;
6398 }
6399
6400 /* Free the list of changes DELTA.  */
6401
6402 static void
6403 iv_ca_delta_free (struct iv_ca_delta **delta)
6404 {
6405   struct iv_ca_delta *act, *next;
6406
6407   for (act = *delta; act; act = next)
6408     {
6409       next = act->next;
6410       free (act);
6411     }
6412
6413   *delta = NULL;
6414 }
6415
6416 /* Allocates new iv candidates assignment.  */
6417
6418 static struct iv_ca *
6419 iv_ca_new (struct ivopts_data *data)
6420 {
6421   struct iv_ca *nw = XNEW (struct iv_ca);
6422
6423   nw->upto = 0;
6424   nw->bad_groups = 0;
6425   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6426                                  data->vgroups.length ());
6427   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6428   nw->cands = BITMAP_ALLOC (NULL);
6429   nw->n_cands = 0;
6430   nw->n_regs = 0;
6431   nw->cand_use_cost = no_cost;
6432   nw->cand_cost = 0;
6433   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6434   nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6435   nw->cost = no_cost;
6436
6437   return nw;
6438 }
6439
6440 /* Free memory occupied by the set IVS.  */
6441
6442 static void
6443 iv_ca_free (struct iv_ca **ivs)
6444 {
6445   free ((*ivs)->cand_for_group);
6446   free ((*ivs)->n_cand_uses);
6447   BITMAP_FREE ((*ivs)->cands);
6448   free ((*ivs)->n_invariant_uses);
6449   delete ((*ivs)->used_inv_exprs);
6450   free (*ivs);
6451   *ivs = NULL;
6452 }
6453
6454 /* Dumps IVS to FILE.  */
6455
6456 static void
6457 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6458 {
6459   unsigned i;
6460   comp_cost cost = iv_ca_cost (ivs);
6461
6462   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6463            cost.complexity);
6464   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6465            ivs->cand_cost, ivs->cand_use_cost.cost,
6466            ivs->cand_use_cost.complexity);
6467   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6468
6469   for (i = 0; i < ivs->upto; i++)
6470     {
6471       struct iv_group *group = data->vgroups[i];
6472       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6473       if (cp)
6474         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6475                  group->id, cp->cand->id, cp->cost.cost,
6476                  cp->cost.complexity);
6477       else
6478         fprintf (file, "   group:%d --> ??\n", group->id);
6479     }
6480
6481   const char *pref = "";
6482   fprintf (file, "  invariant variables: ");
6483   for (i = 1; i <= data->max_inv_id; i++)
6484     if (ivs->n_invariant_uses[i])
6485       {
6486         fprintf (file, "%s%d", pref, i);
6487         pref = ", ";
6488       }
6489
6490   pref = "";
6491   fprintf (file, "\n  invariant expressions: ");
6492   for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6493        = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6494     {
6495         fprintf (file, "%s%d", pref, (*it).first->id);
6496         pref = ", ";
6497     }
6498
6499   fprintf (file, "\n\n");
6500 }
6501
6502 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6503    new set, and store differences in DELTA.  Number of induction variables
6504    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6505    the function will try to find a solution with mimimal iv candidates.  */
6506
6507 static comp_cost
6508 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6509               struct iv_cand *cand, struct iv_ca_delta **delta,
6510               unsigned *n_ivs, bool min_ncand)
6511 {
6512   unsigned i;
6513   comp_cost cost;
6514   struct iv_group *group;
6515   struct cost_pair *old_cp, *new_cp;
6516
6517   *delta = NULL;
6518   for (i = 0; i < ivs->upto; i++)
6519     {
6520       group = data->vgroups[i];
6521       old_cp = iv_ca_cand_for_group (ivs, group);
6522
6523       if (old_cp
6524           && old_cp->cand == cand)
6525         continue;
6526
6527       new_cp = get_group_iv_cost (data, group, cand);
6528       if (!new_cp)
6529         continue;
6530
6531       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6532         continue;
6533
6534       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6535         continue;
6536
6537       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6538     }
6539
6540   iv_ca_delta_commit (data, ivs, *delta, true);
6541   cost = iv_ca_cost (ivs);
6542   if (n_ivs)
6543     *n_ivs = iv_ca_n_cands (ivs);
6544   iv_ca_delta_commit (data, ivs, *delta, false);
6545
6546   return cost;
6547 }
6548
6549 /* Try narrowing set IVS by removing CAND.  Return the cost of
6550    the new set and store the differences in DELTA.  START is
6551    the candidate with which we start narrowing.  */
6552
6553 static comp_cost
6554 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6555               struct iv_cand *cand, struct iv_cand *start,
6556               struct iv_ca_delta **delta)
6557 {
6558   unsigned i, ci;
6559   struct iv_group *group;
6560   struct cost_pair *old_cp, *new_cp, *cp;
6561   bitmap_iterator bi;
6562   struct iv_cand *cnd;
6563   comp_cost cost, best_cost, acost;
6564
6565   *delta = NULL;
6566   for (i = 0; i < data->vgroups.length (); i++)
6567     {
6568       group = data->vgroups[i];
6569
6570       old_cp = iv_ca_cand_for_group (ivs, group);
6571       if (old_cp->cand != cand)
6572         continue;
6573
6574       best_cost = iv_ca_cost (ivs);
6575       /* Start narrowing with START.  */
6576       new_cp = get_group_iv_cost (data, group, start);
6577
6578       if (data->consider_all_candidates)
6579         {
6580           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6581             {
6582               if (ci == cand->id || (start && ci == start->id))
6583                 continue;
6584
6585               cnd = data->vcands[ci];
6586
6587               cp = get_group_iv_cost (data, group, cnd);
6588               if (!cp)
6589                 continue;
6590
6591               iv_ca_set_cp (data, ivs, group, cp);
6592               acost = iv_ca_cost (ivs);
6593
6594               if (acost < best_cost)
6595                 {
6596                   best_cost = acost;
6597                   new_cp = cp;
6598                 }
6599             }
6600         }
6601       else
6602         {
6603           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6604             {
6605               if (ci == cand->id || (start && ci == start->id))
6606                 continue;
6607
6608               cnd = data->vcands[ci];
6609
6610               cp = get_group_iv_cost (data, group, cnd);
6611               if (!cp)
6612                 continue;
6613
6614               iv_ca_set_cp (data, ivs, group, cp);
6615               acost = iv_ca_cost (ivs);
6616
6617               if (acost < best_cost)
6618                 {
6619                   best_cost = acost;
6620                   new_cp = cp;
6621                 }
6622             }
6623         }
6624       /* Restore to old cp for use.  */
6625       iv_ca_set_cp (data, ivs, group, old_cp);
6626
6627       if (!new_cp)
6628         {
6629           iv_ca_delta_free (delta);
6630           return infinite_cost;
6631         }
6632
6633       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6634     }
6635
6636   iv_ca_delta_commit (data, ivs, *delta, true);
6637   cost = iv_ca_cost (ivs);
6638   iv_ca_delta_commit (data, ivs, *delta, false);
6639
6640   return cost;
6641 }
6642
6643 /* Try optimizing the set of candidates IVS by removing candidates different
6644    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6645    differences in DELTA.  */
6646
6647 static comp_cost
6648 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6649              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6650 {
6651   bitmap_iterator bi;
6652   struct iv_ca_delta *act_delta, *best_delta;
6653   unsigned i;
6654   comp_cost best_cost, acost;
6655   struct iv_cand *cand;
6656
6657   best_delta = NULL;
6658   best_cost = iv_ca_cost (ivs);
6659
6660   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6661     {
6662       cand = data->vcands[i];
6663
6664       if (cand == except_cand)
6665         continue;
6666
6667       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6668
6669       if (acost < best_cost)
6670         {
6671           best_cost = acost;
6672           iv_ca_delta_free (&best_delta);
6673           best_delta = act_delta;
6674         }
6675       else
6676         iv_ca_delta_free (&act_delta);
6677     }
6678
6679   if (!best_delta)
6680     {
6681       *delta = NULL;
6682       return best_cost;
6683     }
6684
6685   /* Recurse to possibly remove other unnecessary ivs.  */
6686   iv_ca_delta_commit (data, ivs, best_delta, true);
6687   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6688   iv_ca_delta_commit (data, ivs, best_delta, false);
6689   *delta = iv_ca_delta_join (best_delta, *delta);
6690   return best_cost;
6691 }
6692
6693 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6694    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6695    the corresponding cost_pair, otherwise just return BEST_CP.  */
6696
6697 static struct cost_pair*
6698 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6699                         unsigned int cand_idx, struct iv_cand *old_cand,
6700                         struct cost_pair *best_cp)
6701 {
6702   struct iv_cand *cand;
6703   struct cost_pair *cp;
6704
6705   gcc_assert (old_cand != NULL && best_cp != NULL);
6706   if (cand_idx == old_cand->id)
6707     return best_cp;
6708
6709   cand = data->vcands[cand_idx];
6710   cp = get_group_iv_cost (data, group, cand);
6711   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6712     return cp;
6713
6714   return best_cp;
6715 }
6716
6717 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6718    which are used by more than one iv uses.  For each of those candidates,
6719    this function tries to represent iv uses under that candidate using
6720    other ones with lower local cost, then tries to prune the new set.
6721    If the new set has lower cost, It returns the new cost after recording
6722    candidate replacement in list DELTA.  */
6723
6724 static comp_cost
6725 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6726                struct iv_ca_delta **delta)
6727 {
6728   bitmap_iterator bi, bj;
6729   unsigned int i, j, k;
6730   struct iv_cand *cand;
6731   comp_cost orig_cost, acost;
6732   struct iv_ca_delta *act_delta, *tmp_delta;
6733   struct cost_pair *old_cp, *best_cp = NULL;
6734
6735   *delta = NULL;
6736   orig_cost = iv_ca_cost (ivs);
6737
6738   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6739     {
6740       if (ivs->n_cand_uses[i] == 1
6741           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6742         continue;
6743
6744       cand = data->vcands[i];
6745
6746       act_delta = NULL;
6747       /*  Represent uses under current candidate using other ones with
6748           lower local cost.  */
6749       for (j = 0; j < ivs->upto; j++)
6750         {
6751           struct iv_group *group = data->vgroups[j];
6752           old_cp = iv_ca_cand_for_group (ivs, group);
6753
6754           if (old_cp->cand != cand)
6755             continue;
6756
6757           best_cp = old_cp;
6758           if (data->consider_all_candidates)
6759             for (k = 0; k < data->vcands.length (); k++)
6760               best_cp = cheaper_cost_with_cand (data, group, k,
6761                                                 old_cp->cand, best_cp);
6762           else
6763             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6764               best_cp = cheaper_cost_with_cand (data, group, k,
6765                                                 old_cp->cand, best_cp);
6766
6767           if (best_cp == old_cp)
6768             continue;
6769
6770           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6771         }
6772       /* No need for further prune.  */
6773       if (!act_delta)
6774         continue;
6775
6776       /* Prune the new candidate set.  */
6777       iv_ca_delta_commit (data, ivs, act_delta, true);
6778       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6779       iv_ca_delta_commit (data, ivs, act_delta, false);
6780       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6781
6782       if (acost < orig_cost)
6783         {
6784           *delta = act_delta;
6785           return acost;
6786         }
6787       else
6788         iv_ca_delta_free (&act_delta);
6789     }
6790
6791   return orig_cost;
6792 }
6793
6794 /* Tries to extend the sets IVS in the best possible way in order to
6795    express the GROUP.  If ORIGINALP is true, prefer candidates from
6796    the original set of IVs, otherwise favor important candidates not
6797    based on any memory object.  */
6798
6799 static bool
6800 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6801                   struct iv_group *group, bool originalp)
6802 {
6803   comp_cost best_cost, act_cost;
6804   unsigned i;
6805   bitmap_iterator bi;
6806   struct iv_cand *cand;
6807   struct iv_ca_delta *best_delta = NULL, *act_delta;
6808   struct cost_pair *cp;
6809
6810   iv_ca_add_group (data, ivs, group);
6811   best_cost = iv_ca_cost (ivs);
6812   cp = iv_ca_cand_for_group (ivs, group);
6813   if (cp)
6814     {
6815       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6816       iv_ca_set_no_cp (data, ivs, group);
6817     }
6818
6819   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6820      first try important candidates not based on any memory object.  Only if
6821      this fails, try the specific ones.  Rationale -- in loops with many
6822      variables the best choice often is to use just one generic biv.  If we
6823      added here many ivs specific to the uses, the optimization algorithm later
6824      would be likely to get stuck in a local minimum, thus causing us to create
6825      too many ivs.  The approach from few ivs to more seems more likely to be
6826      successful -- starting from few ivs, replacing an expensive use by a
6827      specific iv should always be a win.  */
6828   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6829     {
6830       cand = data->vcands[i];
6831
6832       if (originalp && cand->pos !=IP_ORIGINAL)
6833         continue;
6834
6835       if (!originalp && cand->iv->base_object != NULL_TREE)
6836         continue;
6837
6838       if (iv_ca_cand_used_p (ivs, cand))
6839         continue;
6840
6841       cp = get_group_iv_cost (data, group, cand);
6842       if (!cp)
6843         continue;
6844
6845       iv_ca_set_cp (data, ivs, group, cp);
6846       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6847                                true);
6848       iv_ca_set_no_cp (data, ivs, group);
6849       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6850
6851       if (act_cost < best_cost)
6852         {
6853           best_cost = act_cost;
6854
6855           iv_ca_delta_free (&best_delta);
6856           best_delta = act_delta;
6857         }
6858       else
6859         iv_ca_delta_free (&act_delta);
6860     }
6861
6862   if (best_cost.infinite_cost_p ())
6863     {
6864       for (i = 0; i < group->n_map_members; i++)
6865         {
6866           cp = group->cost_map + i;
6867           cand = cp->cand;
6868           if (!cand)
6869             continue;
6870
6871           /* Already tried this.  */
6872           if (cand->important)
6873             {
6874               if (originalp && cand->pos == IP_ORIGINAL)
6875                 continue;
6876               if (!originalp && cand->iv->base_object == NULL_TREE)
6877                 continue;
6878             }
6879
6880           if (iv_ca_cand_used_p (ivs, cand))
6881             continue;
6882
6883           act_delta = NULL;
6884           iv_ca_set_cp (data, ivs, group, cp);
6885           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6886           iv_ca_set_no_cp (data, ivs, group);
6887           act_delta = iv_ca_delta_add (group,
6888                                        iv_ca_cand_for_group (ivs, group),
6889                                        cp, act_delta);
6890
6891           if (act_cost < best_cost)
6892             {
6893               best_cost = act_cost;
6894
6895               if (best_delta)
6896                 iv_ca_delta_free (&best_delta);
6897               best_delta = act_delta;
6898             }
6899           else
6900             iv_ca_delta_free (&act_delta);
6901         }
6902     }
6903
6904   iv_ca_delta_commit (data, ivs, best_delta, true);
6905   iv_ca_delta_free (&best_delta);
6906
6907   return !best_cost.infinite_cost_p ();
6908 }
6909
6910 /* Finds an initial assignment of candidates to uses.  */
6911
6912 static struct iv_ca *
6913 get_initial_solution (struct ivopts_data *data, bool originalp)
6914 {
6915   unsigned i;
6916   struct iv_ca *ivs = iv_ca_new (data);
6917
6918   for (i = 0; i < data->vgroups.length (); i++)
6919     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6920       {
6921         iv_ca_free (&ivs);
6922         return NULL;
6923       }
6924
6925   return ivs;
6926 }
6927
6928 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6929    points to a bool variable, this function tries to break local
6930    optimal fixed-point by replacing candidates in IVS if it's true.  */
6931
6932 static bool
6933 try_improve_iv_set (struct ivopts_data *data,
6934                     struct iv_ca *ivs, bool *try_replace_p)
6935 {
6936   unsigned i, n_ivs;
6937   comp_cost acost, best_cost = iv_ca_cost (ivs);
6938   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6939   struct iv_cand *cand;
6940
6941   /* Try extending the set of induction variables by one.  */
6942   for (i = 0; i < data->vcands.length (); i++)
6943     {
6944       cand = data->vcands[i];
6945
6946       if (iv_ca_cand_used_p (ivs, cand))
6947         continue;
6948
6949       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6950       if (!act_delta)
6951         continue;
6952
6953       /* If we successfully added the candidate and the set is small enough,
6954          try optimizing it by removing other candidates.  */
6955       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6956         {
6957           iv_ca_delta_commit (data, ivs, act_delta, true);
6958           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6959           iv_ca_delta_commit (data, ivs, act_delta, false);
6960           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6961         }
6962
6963       if (acost < best_cost)
6964         {
6965           best_cost = acost;
6966           iv_ca_delta_free (&best_delta);
6967           best_delta = act_delta;
6968         }
6969       else
6970         iv_ca_delta_free (&act_delta);
6971     }
6972
6973   if (!best_delta)
6974     {
6975       /* Try removing the candidates from the set instead.  */
6976       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6977
6978       if (!best_delta && *try_replace_p)
6979         {
6980           *try_replace_p = false;
6981           /* So far candidate selecting algorithm tends to choose fewer IVs
6982              so that it can handle cases in which loops have many variables
6983              but the best choice is often to use only one general biv.  One
6984              weakness is it can't handle opposite cases, in which different
6985              candidates should be chosen with respect to each use.  To solve
6986              the problem, we replace candidates in a manner described by the
6987              comments of iv_ca_replace, thus give general algorithm a chance
6988              to break local optimal fixed-point in these cases.  */
6989           best_cost = iv_ca_replace (data, ivs, &best_delta);
6990         }
6991
6992       if (!best_delta)
6993         return false;
6994     }
6995
6996   iv_ca_delta_commit (data, ivs, best_delta, true);
6997   gcc_assert (best_cost == iv_ca_cost (ivs));
6998   iv_ca_delta_free (&best_delta);
6999   return true;
7000 }
7001
7002 /* Attempts to find the optimal set of induction variables.  We do simple
7003    greedy heuristic -- we try to replace at most one candidate in the selected
7004    solution and remove the unused ivs while this improves the cost.  */
7005
7006 static struct iv_ca *
7007 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7008 {
7009   struct iv_ca *set;
7010   bool try_replace_p = true;
7011
7012   /* Get the initial solution.  */
7013   set = get_initial_solution (data, originalp);
7014   if (!set)
7015     {
7016       if (dump_file && (dump_flags & TDF_DETAILS))
7017         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7018       return NULL;
7019     }
7020
7021   if (dump_file && (dump_flags & TDF_DETAILS))
7022     {
7023       fprintf (dump_file, "Initial set of candidates:\n");
7024       iv_ca_dump (data, dump_file, set);
7025     }
7026
7027   while (try_improve_iv_set (data, set, &try_replace_p))
7028     {
7029       if (dump_file && (dump_flags & TDF_DETAILS))
7030         {
7031           fprintf (dump_file, "Improved to:\n");
7032           iv_ca_dump (data, dump_file, set);
7033         }
7034     }
7035
7036   return set;
7037 }
7038
7039 static struct iv_ca *
7040 find_optimal_iv_set (struct ivopts_data *data)
7041 {
7042   unsigned i;
7043   comp_cost cost, origcost;
7044   struct iv_ca *set, *origset;
7045
7046   /* Determine the cost based on a strategy that starts with original IVs,
7047      and try again using a strategy that prefers candidates not based
7048      on any IVs.  */
7049   origset = find_optimal_iv_set_1 (data, true);
7050   set = find_optimal_iv_set_1 (data, false);
7051
7052   if (!origset && !set)
7053     return NULL;
7054
7055   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7056   cost = set ? iv_ca_cost (set) : infinite_cost;
7057
7058   if (dump_file && (dump_flags & TDF_DETAILS))
7059     {
7060       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7061                origcost.cost, origcost.complexity);
7062       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7063                cost.cost, cost.complexity);
7064     }
7065
7066   /* Choose the one with the best cost.  */
7067   if (origcost <= cost)
7068     {
7069       if (set)
7070         iv_ca_free (&set);
7071       set = origset;
7072     }
7073   else if (origset)
7074     iv_ca_free (&origset);
7075
7076   for (i = 0; i < data->vgroups.length (); i++)
7077     {
7078       struct iv_group *group = data->vgroups[i];
7079       group->selected = iv_ca_cand_for_group (set, group)->cand;
7080     }
7081
7082   return set;
7083 }
7084
7085 /* Creates a new induction variable corresponding to CAND.  */
7086
7087 static void
7088 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7089 {
7090   gimple_stmt_iterator incr_pos;
7091   tree base;
7092   struct iv_use *use;
7093   struct iv_group *group;
7094   bool after = false;
7095
7096   if (!cand->iv)
7097     return;
7098
7099   switch (cand->pos)
7100     {
7101     case IP_NORMAL:
7102       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7103       break;
7104
7105     case IP_END:
7106       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7107       after = true;
7108       break;
7109
7110     case IP_AFTER_USE:
7111       after = true;
7112       /* fall through */
7113     case IP_BEFORE_USE:
7114       incr_pos = gsi_for_stmt (cand->incremented_at);
7115       break;
7116
7117     case IP_ORIGINAL:
7118       /* Mark that the iv is preserved.  */
7119       name_info (data, cand->var_before)->preserve_biv = true;
7120       name_info (data, cand->var_after)->preserve_biv = true;
7121
7122       /* Rewrite the increment so that it uses var_before directly.  */
7123       use = find_interesting_uses_op (data, cand->var_after);
7124       group = data->vgroups[use->group_id];
7125       group->selected = cand;
7126       return;
7127     }
7128
7129   gimple_add_tmp_var (cand->var_before);
7130
7131   base = unshare_expr (cand->iv->base);
7132
7133   create_iv (base, unshare_expr (cand->iv->step),
7134              cand->var_before, data->current_loop,
7135              &incr_pos, after, &cand->var_before, &cand->var_after);
7136 }
7137
7138 /* Creates new induction variables described in SET.  */
7139
7140 static void
7141 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7142 {
7143   unsigned i;
7144   struct iv_cand *cand;
7145   bitmap_iterator bi;
7146
7147   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7148     {
7149       cand = data->vcands[i];
7150       create_new_iv (data, cand);
7151     }
7152
7153   if (dump_file && (dump_flags & TDF_DETAILS))
7154     {
7155       fprintf (dump_file, "Selected IV set for loop %d",
7156                data->current_loop->num);
7157       if (data->loop_loc != UNKNOWN_LOCATION)
7158         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7159                  LOCATION_LINE (data->loop_loc));
7160       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7161                avg_loop_niter (data->current_loop));
7162       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7163                (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7164       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7165       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7166         {
7167           cand = data->vcands[i];
7168           dump_cand (dump_file, cand);
7169         }
7170       fprintf (dump_file, "\n");
7171     }
7172 }
7173
7174 /* Rewrites USE (definition of iv used in a nonlinear expression)
7175    using candidate CAND.  */
7176
7177 static void
7178 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7179                             struct iv_use *use, struct iv_cand *cand)
7180 {
7181   tree comp;
7182   tree op, tgt;
7183   gassign *ass;
7184   gimple_stmt_iterator bsi;
7185
7186   /* An important special case -- if we are asked to express value of
7187      the original iv by itself, just exit; there is no need to
7188      introduce a new computation (that might also need casting the
7189      variable to unsigned and back).  */
7190   if (cand->pos == IP_ORIGINAL
7191       && cand->incremented_at == use->stmt)
7192     {
7193       enum tree_code stmt_code;
7194
7195       gcc_assert (is_gimple_assign (use->stmt));
7196       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7197
7198       /* Check whether we may leave the computation unchanged.
7199          This is the case only if it does not rely on other
7200          computations in the loop -- otherwise, the computation
7201          we rely upon may be removed in remove_unused_ivs,
7202          thus leading to ICE.  */
7203       stmt_code = gimple_assign_rhs_code (use->stmt);
7204       if (stmt_code == PLUS_EXPR
7205           || stmt_code == MINUS_EXPR
7206           || stmt_code == POINTER_PLUS_EXPR)
7207         {
7208           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7209             op = gimple_assign_rhs2 (use->stmt);
7210           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7211             op = gimple_assign_rhs1 (use->stmt);
7212           else
7213             op = NULL_TREE;
7214         }
7215       else
7216         op = NULL_TREE;
7217
7218       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7219         return;
7220     }
7221
7222   comp = get_computation (data->current_loop, use, cand);
7223   gcc_assert (comp != NULL_TREE);
7224
7225   switch (gimple_code (use->stmt))
7226     {
7227     case GIMPLE_PHI:
7228       tgt = PHI_RESULT (use->stmt);
7229
7230       /* If we should keep the biv, do not replace it.  */
7231       if (name_info (data, tgt)->preserve_biv)
7232         return;
7233
7234       bsi = gsi_after_labels (gimple_bb (use->stmt));
7235       break;
7236
7237     case GIMPLE_ASSIGN:
7238       tgt = gimple_assign_lhs (use->stmt);
7239       bsi = gsi_for_stmt (use->stmt);
7240       break;
7241
7242     default:
7243       gcc_unreachable ();
7244     }
7245
7246   if (!valid_gimple_rhs_p (comp)
7247       || (gimple_code (use->stmt) != GIMPLE_PHI
7248           /* We can't allow re-allocating the stmt as it might be pointed
7249              to still.  */
7250           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7251               >= gimple_num_ops (gsi_stmt (bsi)))))
7252     {
7253       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7254                                        true, GSI_SAME_STMT);
7255       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7256         {
7257           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7258           /* As this isn't a plain copy we have to reset alignment
7259              information.  */
7260           if (SSA_NAME_PTR_INFO (comp))
7261             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7262         }
7263     }
7264
7265   if (gimple_code (use->stmt) == GIMPLE_PHI)
7266     {
7267       ass = gimple_build_assign (tgt, comp);
7268       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7269
7270       bsi = gsi_for_stmt (use->stmt);
7271       remove_phi_node (&bsi, false);
7272     }
7273   else
7274     {
7275       gimple_assign_set_rhs_from_tree (&bsi, comp);
7276       use->stmt = gsi_stmt (bsi);
7277     }
7278 }
7279
7280 /* Performs a peephole optimization to reorder the iv update statement with
7281    a mem ref to enable instruction combining in later phases. The mem ref uses
7282    the iv value before the update, so the reordering transformation requires
7283    adjustment of the offset. CAND is the selected IV_CAND.
7284
7285    Example:
7286
7287    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7288    iv2 = iv1 + 1;
7289
7290    if (t < val)      (1)
7291      goto L;
7292    goto Head;
7293
7294
7295    directly propagating t over to (1) will introduce overlapping live range
7296    thus increase register pressure. This peephole transform it into:
7297
7298
7299    iv2 = iv1 + 1;
7300    t = MEM_REF (base, iv2, 8, 8);
7301    if (t < val)
7302      goto L;
7303    goto Head;
7304 */
7305
7306 static void
7307 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7308 {
7309   tree var_after;
7310   gimple *iv_update, *stmt;
7311   basic_block bb;
7312   gimple_stmt_iterator gsi, gsi_iv;
7313
7314   if (cand->pos != IP_NORMAL)
7315     return;
7316
7317   var_after = cand->var_after;
7318   iv_update = SSA_NAME_DEF_STMT (var_after);
7319
7320   bb = gimple_bb (iv_update);
7321   gsi = gsi_last_nondebug_bb (bb);
7322   stmt = gsi_stmt (gsi);
7323
7324   /* Only handle conditional statement for now.  */
7325   if (gimple_code (stmt) != GIMPLE_COND)
7326     return;
7327
7328   gsi_prev_nondebug (&gsi);
7329   stmt = gsi_stmt (gsi);
7330   if (stmt != iv_update)
7331     return;
7332
7333   gsi_prev_nondebug (&gsi);
7334   if (gsi_end_p (gsi))
7335     return;
7336
7337   stmt = gsi_stmt (gsi);
7338   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7339     return;
7340
7341   if (stmt != use->stmt)
7342     return;
7343
7344   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7345     return;
7346
7347   if (dump_file && (dump_flags & TDF_DETAILS))
7348     {
7349       fprintf (dump_file, "Reordering \n");
7350       print_gimple_stmt (dump_file, iv_update, 0, 0);
7351       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7352       fprintf (dump_file, "\n");
7353     }
7354
7355   gsi = gsi_for_stmt (use->stmt);
7356   gsi_iv = gsi_for_stmt (iv_update);
7357   gsi_move_before (&gsi_iv, &gsi);
7358
7359   cand->pos = IP_BEFORE_USE;
7360   cand->incremented_at = use->stmt;
7361 }
7362
7363 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7364
7365 static void
7366 rewrite_use_address (struct ivopts_data *data,
7367                      struct iv_use *use, struct iv_cand *cand)
7368 {
7369   aff_tree aff;
7370   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7371   tree base_hint = NULL_TREE;
7372   tree ref, iv;
7373   bool ok;
7374
7375   adjust_iv_update_pos (cand, use);
7376   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7377   gcc_assert (ok);
7378   unshare_aff_combination (&aff);
7379
7380   /* To avoid undefined overflow problems, all IV candidates use unsigned
7381      integer types.  The drawback is that this makes it impossible for
7382      create_mem_ref to distinguish an IV that is based on a memory object
7383      from one that represents simply an offset.
7384
7385      To work around this problem, we pass a hint to create_mem_ref that
7386      indicates which variable (if any) in aff is an IV based on a memory
7387      object.  Note that we only consider the candidate.  If this is not
7388      based on an object, the base of the reference is in some subexpression
7389      of the use -- but these will use pointer types, so they are recognized
7390      by the create_mem_ref heuristics anyway.  */
7391   if (cand->iv->base_object)
7392     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7393
7394   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7395   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7396                         reference_alias_ptr_type (*use->op_p),
7397                         iv, base_hint, data->speed);
7398   copy_ref_info (ref, *use->op_p);
7399   *use->op_p = ref;
7400 }
7401
7402 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7403    candidate CAND.  */
7404
7405 static void
7406 rewrite_use_compare (struct ivopts_data *data,
7407                      struct iv_use *use, struct iv_cand *cand)
7408 {
7409   tree comp, *var_p, op, bound;
7410   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7411   enum tree_code compare;
7412   struct iv_group *group = data->vgroups[use->group_id];
7413   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7414   bool ok;
7415
7416   bound = cp->value;
7417   if (bound)
7418     {
7419       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7420       tree var_type = TREE_TYPE (var);
7421       gimple_seq stmts;
7422
7423       if (dump_file && (dump_flags & TDF_DETAILS))
7424         {
7425           fprintf (dump_file, "Replacing exit test: ");
7426           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7427         }
7428       compare = cp->comp;
7429       bound = unshare_expr (fold_convert (var_type, bound));
7430       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7431       if (stmts)
7432         gsi_insert_seq_on_edge_immediate (
7433                 loop_preheader_edge (data->current_loop),
7434                 stmts);
7435
7436       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7437       gimple_cond_set_lhs (cond_stmt, var);
7438       gimple_cond_set_code (cond_stmt, compare);
7439       gimple_cond_set_rhs (cond_stmt, op);
7440       return;
7441     }
7442
7443   /* The induction variable elimination failed; just express the original
7444      giv.  */
7445   comp = get_computation (data->current_loop, use, cand);
7446   gcc_assert (comp != NULL_TREE);
7447
7448   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7449   gcc_assert (ok);
7450
7451   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7452                                      true, GSI_SAME_STMT);
7453 }
7454
7455 /* Rewrite the groups using the selected induction variables.  */
7456
7457 static void
7458 rewrite_groups (struct ivopts_data *data)
7459 {
7460   unsigned i, j;
7461
7462   for (i = 0; i < data->vgroups.length (); i++)
7463     {
7464       struct iv_group *group = data->vgroups[i];
7465       struct iv_cand *cand = group->selected;
7466
7467       gcc_assert (cand);
7468
7469       if (group->type == USE_NONLINEAR_EXPR)
7470         {
7471           for (j = 0; j < group->vuses.length (); j++)
7472             {
7473               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7474               update_stmt (group->vuses[j]->stmt);
7475             }
7476         }
7477       else if (group->type == USE_ADDRESS)
7478         {
7479           for (j = 0; j < group->vuses.length (); j++)
7480             {
7481               rewrite_use_address (data, group->vuses[j], cand);
7482               update_stmt (group->vuses[j]->stmt);
7483             }
7484         }
7485       else
7486         {
7487           gcc_assert (group->type == USE_COMPARE);
7488
7489           for (j = 0; j < group->vuses.length (); j++)
7490             {
7491               rewrite_use_compare (data, group->vuses[j], cand);
7492               update_stmt (group->vuses[j]->stmt);
7493             }
7494         }
7495     }
7496 }
7497
7498 /* Removes the ivs that are not used after rewriting.  */
7499
7500 static void
7501 remove_unused_ivs (struct ivopts_data *data)
7502 {
7503   unsigned j;
7504   bitmap_iterator bi;
7505   bitmap toremove = BITMAP_ALLOC (NULL);
7506
7507   /* Figure out an order in which to release SSA DEFs so that we don't
7508      release something that we'd have to propagate into a debug stmt
7509      afterwards.  */
7510   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7511     {
7512       struct version_info *info;
7513
7514       info = ver_info (data, j);
7515       if (info->iv
7516           && !integer_zerop (info->iv->step)
7517           && !info->inv_id
7518           && !info->iv->nonlin_use
7519           && !info->preserve_biv)
7520         {
7521           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7522
7523           tree def = info->iv->ssa_name;
7524
7525           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7526             {
7527               imm_use_iterator imm_iter;
7528               use_operand_p use_p;
7529               gimple *stmt;
7530               int count = 0;
7531
7532               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7533                 {
7534                   if (!gimple_debug_bind_p (stmt))
7535                     continue;
7536
7537                   /* We just want to determine whether to do nothing
7538                      (count == 0), to substitute the computed
7539                      expression into a single use of the SSA DEF by
7540                      itself (count == 1), or to use a debug temp
7541                      because the SSA DEF is used multiple times or as
7542                      part of a larger expression (count > 1). */
7543                   count++;
7544                   if (gimple_debug_bind_get_value (stmt) != def)
7545                     count++;
7546
7547                   if (count > 1)
7548                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7549                 }
7550
7551               if (!count)
7552                 continue;
7553
7554               struct iv_use dummy_use;
7555               struct iv_cand *best_cand = NULL, *cand;
7556               unsigned i, best_pref = 0, cand_pref;
7557
7558               memset (&dummy_use, 0, sizeof (dummy_use));
7559               dummy_use.iv = info->iv;
7560               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7561                 {
7562                   cand = data->vgroups[i]->selected;
7563                   if (cand == best_cand)
7564                     continue;
7565                   cand_pref = operand_equal_p (cand->iv->step,
7566                                                info->iv->step, 0)
7567                     ? 4 : 0;
7568                   cand_pref
7569                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7570                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7571                     ? 2 : 0;
7572                   cand_pref
7573                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7574                     ? 1 : 0;
7575                   if (best_cand == NULL || best_pref < cand_pref)
7576                     {
7577                       best_cand = cand;
7578                       best_pref = cand_pref;
7579                     }
7580                 }
7581
7582               if (!best_cand)
7583                 continue;
7584
7585               tree comp = get_computation_at (data->current_loop,
7586                                               &dummy_use, best_cand,
7587                                               SSA_NAME_DEF_STMT (def));
7588               if (!comp)
7589                 continue;
7590
7591               if (count > 1)
7592                 {
7593                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7594                   DECL_ARTIFICIAL (vexpr) = 1;
7595                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7596                   if (SSA_NAME_VAR (def))
7597                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7598                   else
7599                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7600                   gdebug *def_temp
7601                     = gimple_build_debug_bind (vexpr, comp, NULL);
7602                   gimple_stmt_iterator gsi;
7603
7604                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7605                     gsi = gsi_after_labels (gimple_bb
7606                                             (SSA_NAME_DEF_STMT (def)));
7607                   else
7608                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7609
7610                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7611                   comp = vexpr;
7612                 }
7613
7614               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7615                 {
7616                   if (!gimple_debug_bind_p (stmt))
7617                     continue;
7618
7619                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7620                     SET_USE (use_p, comp);
7621
7622                   update_stmt (stmt);
7623                 }
7624             }
7625         }
7626     }
7627
7628   release_defs_bitset (toremove);
7629
7630   BITMAP_FREE (toremove);
7631 }
7632
7633 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7634    for hash_map::traverse.  */
7635
7636 bool
7637 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7638 {
7639   free (value);
7640   return true;
7641 }
7642
7643 /* Frees data allocated by the optimization of a single loop.  */
7644
7645 static void
7646 free_loop_data (struct ivopts_data *data)
7647 {
7648   unsigned i, j;
7649   bitmap_iterator bi;
7650   tree obj;
7651
7652   if (data->niters)
7653     {
7654       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7655       delete data->niters;
7656       data->niters = NULL;
7657     }
7658
7659   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7660     {
7661       struct version_info *info;
7662
7663       info = ver_info (data, i);
7664       info->iv = NULL;
7665       info->has_nonlin_use = false;
7666       info->preserve_biv = false;
7667       info->inv_id = 0;
7668     }
7669   bitmap_clear (data->relevant);
7670   bitmap_clear (data->important_candidates);
7671
7672   for (i = 0; i < data->vgroups.length (); i++)
7673     {
7674       struct iv_group *group = data->vgroups[i];
7675
7676       for (j = 0; j < group->vuses.length (); j++)
7677         free (group->vuses[j]);
7678       group->vuses.release ();
7679
7680       BITMAP_FREE (group->related_cands);
7681       for (j = 0; j < group->n_map_members; j++)
7682         if (group->cost_map[j].depends_on)
7683           BITMAP_FREE (group->cost_map[j].depends_on);
7684
7685       free (group->cost_map);
7686       free (group);
7687     }
7688   data->vgroups.truncate (0);
7689
7690   for (i = 0; i < data->vcands.length (); i++)
7691     {
7692       struct iv_cand *cand = data->vcands[i];
7693
7694       if (cand->depends_on)
7695         BITMAP_FREE (cand->depends_on);
7696       free (cand);
7697     }
7698   data->vcands.truncate (0);
7699
7700   if (data->version_info_size < num_ssa_names)
7701     {
7702       data->version_info_size = 2 * num_ssa_names;
7703       free (data->version_info);
7704       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7705     }
7706
7707   data->max_inv_id = 0;
7708
7709   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7710     SET_DECL_RTL (obj, NULL_RTX);
7711
7712   decl_rtl_to_reset.truncate (0);
7713
7714   data->inv_expr_tab->empty ();
7715   data->max_inv_expr_id = 0;
7716
7717   data->iv_common_cand_tab->empty ();
7718   data->iv_common_cands.truncate (0);
7719 }
7720
7721 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7722    loop tree.  */
7723
7724 static void
7725 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7726 {
7727   free_loop_data (data);
7728   free (data->version_info);
7729   BITMAP_FREE (data->relevant);
7730   BITMAP_FREE (data->important_candidates);
7731
7732   decl_rtl_to_reset.release ();
7733   data->vgroups.release ();
7734   data->vcands.release ();
7735   delete data->inv_expr_tab;
7736   data->inv_expr_tab = NULL;
7737   free_affine_expand_cache (&data->name_expansion_cache);
7738   delete data->iv_common_cand_tab;
7739   data->iv_common_cand_tab = NULL;
7740   data->iv_common_cands.release ();
7741   obstack_free (&data->iv_obstack, NULL);
7742 }
7743
7744 /* Returns true if the loop body BODY includes any function calls.  */
7745
7746 static bool
7747 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7748 {
7749   gimple_stmt_iterator gsi;
7750   unsigned i;
7751
7752   for (i = 0; i < num_nodes; i++)
7753     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7754       {
7755         gimple *stmt = gsi_stmt (gsi);
7756         if (is_gimple_call (stmt)
7757             && !gimple_call_internal_p (stmt)
7758             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7759           return true;
7760       }
7761   return false;
7762 }
7763
7764 /* Optimizes the LOOP.  Returns true if anything changed.  */
7765
7766 static bool
7767 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7768 {
7769   bool changed = false;
7770   struct iv_ca *iv_ca;
7771   edge exit = single_dom_exit (loop);
7772   basic_block *body;
7773
7774   gcc_assert (!data->niters);
7775   data->current_loop = loop;
7776   data->loop_loc = find_loop_location (loop);
7777   data->speed = optimize_loop_for_speed_p (loop);
7778
7779   if (dump_file && (dump_flags & TDF_DETAILS))
7780     {
7781       fprintf (dump_file, "Processing loop %d", loop->num);
7782       if (data->loop_loc != UNKNOWN_LOCATION)
7783         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7784                  LOCATION_LINE (data->loop_loc));
7785       fprintf (dump_file, "\n");
7786
7787       if (exit)
7788         {
7789           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7790                    exit->src->index, exit->dest->index);
7791           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7792           fprintf (dump_file, "\n");
7793         }
7794
7795       fprintf (dump_file, "\n");
7796     }
7797
7798   body = get_loop_body (loop);
7799   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7800   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7801   free (body);
7802
7803   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7804
7805   /* For each ssa name determines whether it behaves as an induction variable
7806      in some loop.  */
7807   if (!find_induction_variables (data))
7808     goto finish;
7809
7810   /* Finds interesting uses (item 1).  */
7811   find_interesting_uses (data);
7812   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7813     goto finish;
7814
7815   /* Finds candidates for the induction variables (item 2).  */
7816   find_iv_candidates (data);
7817
7818   /* Calculates the costs (item 3, part 1).  */
7819   determine_iv_costs (data);
7820   determine_group_iv_costs (data);
7821   determine_set_costs (data);
7822
7823   /* Find the optimal set of induction variables (item 3, part 2).  */
7824   iv_ca = find_optimal_iv_set (data);
7825   if (!iv_ca)
7826     goto finish;
7827   changed = true;
7828
7829   /* Create the new induction variables (item 4, part 1).  */
7830   create_new_ivs (data, iv_ca);
7831   iv_ca_free (&iv_ca);
7832
7833   /* Rewrite the uses (item 4, part 2).  */
7834   rewrite_groups (data);
7835
7836   /* Remove the ivs that are unused after rewriting.  */
7837   remove_unused_ivs (data);
7838
7839   /* We have changed the structure of induction variables; it might happen
7840      that definitions in the scev database refer to some of them that were
7841      eliminated.  */
7842   scev_reset ();
7843
7844 finish:
7845   free_loop_data (data);
7846
7847   return changed;
7848 }
7849
7850 /* Main entry point.  Optimizes induction variables in loops.  */
7851
7852 void
7853 tree_ssa_iv_optimize (void)
7854 {
7855   struct loop *loop;
7856   struct ivopts_data data;
7857
7858   tree_ssa_iv_optimize_init (&data);
7859
7860   /* Optimize the loops starting with the innermost ones.  */
7861   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7862     {
7863       if (dump_file && (dump_flags & TDF_DETAILS))
7864         flow_loop_dump (loop, dump_file, NULL, 1);
7865
7866       tree_ssa_iv_optimize_loop (&data, loop);
7867     }
7868
7869   tree_ssa_iv_optimize_finalize (&data);
7870 }