gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "tm_p.h"
  79 #include "ssa.h"
  80 #include "expmed.h"
  81 #include "insn-config.h"
  82 #include "emit-rtl.h"
  83 #include "recog.h"
  84 #include "cgraph.h"
  85 #include "gimple-pretty-print.h"
  86 #include "alias.h"
  87 #include "fold-const.h"
  88 #include "stor-layout.h"
  89 #include "tree-eh.h"
  90 #include "gimplify.h"
  91 #include "gimple-iterator.h"
  92 #include "gimplify-me.h"
  93 #include "tree-cfg.h"
  94 #include "tree-ssa-loop-ivopts.h"
  95 #include "tree-ssa-loop-manip.h"
  96 #include "tree-ssa-loop-niter.h"
  97 #include "tree-ssa-loop.h"
  98 #include "explow.h"
  99 #include "expr.h"
 100 #include "tree-dfa.h"
 101 #include "tree-ssa.h"
 102 #include "cfgloop.h"
 103 #include "tree-scalar-evolution.h"
 104 #include "params.h"
 105 #include "tree-affine.h"
 106 #include "tree-ssa-propagate.h"
 107 #include "tree-ssa-address.h"
 108 #include "builtins.h"
 109 #include "tree-vectorizer.h"
 110
 111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 112    cost of different addressing modes.  This should be moved to a TBD
 113    interface between the GIMPLE and RTL worlds.  */
 114
 115 /* The infinite cost.  */
 116 #define INFTY 10000000
 117
 118 #define AVG_LOOP_NITER(LOOP) 5
 119
 120 /* Returns the expected number of loop iterations for LOOP.
 121    The average trip count is computed from profile data if it
 122    exists. */
 123
 124 static inline HOST_WIDE_INT
 125 avg_loop_niter (struct loop *loop)
 126 {
 127   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 128   if (niter == -1)
 129     {
 130       niter = likely_max_stmt_executions_int (loop);
 131       if (niter == -1 || niter > AVG_LOOP_NITER (loop))
 132         return AVG_LOOP_NITER (loop);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   bitmap depends_on;    /* The list of invariants that have to be
 350                            preserved.  */
 351   tree value;           /* For final value elimination, the expression for
 352                            the final value of the iv.  For iv elimination,
 353                            the new bound to compare with.  */
 354   enum tree_code comp;  /* For iv elimination, the comparison.  */
 355   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
 356 };
 357
 358 /* Use.  */
 359 struct iv_use
 360 {
 361   unsigned id;          /* The id of the use.  */
 362   unsigned group_id;    /* The group id the use belongs to.  */
 363   enum use_type type;   /* Type of the use.  */
 364   struct iv *iv;        /* The induction variable it is based on.  */
 365   gimple *stmt;         /* Statement in that it occurs.  */
 366   tree *op_p;           /* The place where it occurs.  */
 367
 368   tree addr_base;       /* Base address with const offset stripped.  */
 369   unsigned HOST_WIDE_INT addr_offset;
 370                         /* Const offset stripped from base address.  */
 371 };
 372
 373 /* Group of uses.  */
 374 struct iv_group
 375 {
 376   /* The id of the group.  */
 377   unsigned id;
 378   /* Uses of the group are of the same type.  */
 379   enum use_type type;
 380   /* The set of "related" IV candidates, plus the important ones.  */
 381   bitmap related_cands;
 382   /* Number of IV candidates in the cost_map.  */
 383   unsigned n_map_members;
 384   /* The costs wrto the iv candidates.  */
 385   struct cost_pair *cost_map;
 386   /* The selected candidate for the group.  */
 387   struct iv_cand *selected;
 388   /* Uses in the group.  */
 389   vec<struct iv_use *> vuses;
 390 };
 391
 392 /* The position where the iv is computed.  */
 393 enum iv_position
 394 {
 395   IP_NORMAL,            /* At the end, just before the exit condition.  */
 396   IP_END,               /* At the end of the latch block.  */
 397   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 398   IP_AFTER_USE,         /* Immediately after a specific use.  */
 399   IP_ORIGINAL           /* The original biv.  */
 400 };
 401
 402 /* The induction variable candidate.  */
 403 struct iv_cand
 404 {
 405   unsigned id;          /* The number of the candidate.  */
 406   bool important;       /* Whether this is an "important" candidate, i.e. such
 407                            that it should be considered by all uses.  */
 408   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 409   gimple *incremented_at;/* For original biv, the statement where it is
 410                            incremented.  */
 411   tree var_before;      /* The variable used for it before increment.  */
 412   tree var_after;       /* The variable used for it after increment.  */
 413   struct iv *iv;        /* The value of the candidate.  NULL for
 414                            "pseudocandidate" used to indicate the possibility
 415                            to replace the final value of an iv by direct
 416                            computation of the value.  */
 417   unsigned cost;        /* Cost of the candidate.  */
 418   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 419   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 420                               where it is incremented.  */
 421   bitmap depends_on;    /* The list of invariants that are used in step of the
 422                            biv.  */
 423   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 424                            smaller type.  */
 425 };
 426
 427 /* Hashtable entry for common candidate derived from iv uses.  */
 428 struct iv_common_cand
 429 {
 430   tree base;
 431   tree step;
 432   /* IV uses from which this common candidate is derived.  */
 433   auto_vec<struct iv_use *> uses;
 434   hashval_t hash;
 435 };
 436
 437 /* Hashtable helpers.  */
 438
 439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 440 {
 441   static inline hashval_t hash (const iv_common_cand *);
 442   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 443 };
 444
 445 /* Hash function for possible common candidates.  */
 446
 447 inline hashval_t
 448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 449 {
 450   return ccand->hash;
 451 }
 452
 453 /* Hash table equality function for common candidates.  */
 454
 455 inline bool
 456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 457                               const iv_common_cand *ccand2)
 458 {
 459   return (ccand1->hash == ccand2->hash
 460           && operand_equal_p (ccand1->base, ccand2->base, 0)
 461           && operand_equal_p (ccand1->step, ccand2->step, 0)
 462           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 463               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 464 }
 465
 466 /* Loop invariant expression hashtable entry.  */
 467
 468 struct iv_inv_expr_ent
 469 {
 470   /* Tree expression of the entry.  */
 471   tree expr;
 472   /* Unique indentifier.  */
 473   int id;
 474   /* Hash value.  */
 475   hashval_t hash;
 476 };
 477
 478 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 479
 480 static int
 481 sort_iv_inv_expr_ent (const void *a, const void *b)
 482 {
 483   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 484   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 485
 486   unsigned id1 = (*e1)->id;
 487   unsigned id2 = (*e2)->id;
 488
 489   if (id1 < id2)
 490     return -1;
 491   else if (id1 > id2)
 492     return 1;
 493   else
 494     return 0;
 495 }
 496
 497 /* Hashtable helpers.  */
 498
 499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 500 {
 501   static inline hashval_t hash (const iv_inv_expr_ent *);
 502   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 503 };
 504
 505 /* Hash function for loop invariant expressions.  */
 506
 507 inline hashval_t
 508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 509 {
 510   return expr->hash;
 511 }
 512
 513 /* Hash table equality function for expressions.  */
 514
 515 inline bool
 516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 517                            const iv_inv_expr_ent *expr2)
 518 {
 519   return expr1->hash == expr2->hash
 520          && operand_equal_p (expr1->expr, expr2->expr, 0);
 521 }
 522
 523 struct ivopts_data
 524 {
 525   /* The currently optimized loop.  */
 526   struct loop *current_loop;
 527   source_location loop_loc;
 528
 529   /* Numbers of iterations for all exits of the current loop.  */
 530   hash_map<edge, tree_niter_desc *> *niters;
 531
 532   /* Number of registers used in it.  */
 533   unsigned regs_used;
 534
 535   /* The size of version_info array allocated.  */
 536   unsigned version_info_size;
 537
 538   /* The array of information for the ssa names.  */
 539   struct version_info *version_info;
 540
 541   /* The hashtable of loop invariant expressions created
 542      by ivopt.  */
 543   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 544
 545   /* Loop invariant expression id.  */
 546   int max_inv_expr_id;
 547
 548   /* The bitmap of indices in version_info whose value was changed.  */
 549   bitmap relevant;
 550
 551   /* The uses of induction variables.  */
 552   vec<iv_group *> vgroups;
 553
 554   /* The candidates.  */
 555   vec<iv_cand *> vcands;
 556
 557   /* A bitmap of important candidates.  */
 558   bitmap important_candidates;
 559
 560   /* Cache used by tree_to_aff_combination_expand.  */
 561   hash_map<tree, name_expansion *> *name_expansion_cache;
 562
 563   /* The hashtable of common candidates derived from iv uses.  */
 564   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 565
 566   /* The common candidates.  */
 567   vec<iv_common_cand *> iv_common_cands;
 568
 569   /* The maximum invariant id.  */
 570   unsigned max_inv_id;
 571
 572   /* Number of no_overflow BIVs which are not used in memory address.  */
 573   unsigned bivs_not_used_in_addr;
 574
 575   /* Obstack for iv structure.  */
 576   struct obstack iv_obstack;
 577
 578   /* Whether to consider just related and important candidates when replacing a
 579      use.  */
 580   bool consider_all_candidates;
 581
 582   /* Are we optimizing for speed?  */
 583   bool speed;
 584
 585   /* Whether the loop body includes any function calls.  */
 586   bool body_includes_call;
 587
 588   /* Whether the loop body can only be exited via single exit.  */
 589   bool loop_single_exit_p;
 590 };
 591
 592 /* An assignment of iv candidates to uses.  */
 593
 594 struct iv_ca
 595 {
 596   /* The number of uses covered by the assignment.  */
 597   unsigned upto;
 598
 599   /* Number of uses that cannot be expressed by the candidates in the set.  */
 600   unsigned bad_groups;
 601
 602   /* Candidate assigned to a use, together with the related costs.  */
 603   struct cost_pair **cand_for_group;
 604
 605   /* Number of times each candidate is used.  */
 606   unsigned *n_cand_uses;
 607
 608   /* The candidates used.  */
 609   bitmap cands;
 610
 611   /* The number of candidates in the set.  */
 612   unsigned n_cands;
 613
 614   /* Total number of registers needed.  */
 615   unsigned n_regs;
 616
 617   /* Total cost of expressing uses.  */
 618   comp_cost cand_use_cost;
 619
 620   /* Total cost of candidates.  */
 621   unsigned cand_cost;
 622
 623   /* Number of times each invariant is used.  */
 624   unsigned *n_invariant_uses;
 625
 626   /* Hash set with used invariant expression.  */
 627   hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
 628
 629   /* Total cost of the assignment.  */
 630   comp_cost cost;
 631 };
 632
 633 /* Difference of two iv candidate assignments.  */
 634
 635 struct iv_ca_delta
 636 {
 637   /* Changed group.  */
 638   struct iv_group *group;
 639
 640   /* An old assignment (for rollback purposes).  */
 641   struct cost_pair *old_cp;
 642
 643   /* A new assignment.  */
 644   struct cost_pair *new_cp;
 645
 646   /* Next change in the list.  */
 647   struct iv_ca_delta *next;
 648 };
 649
 650 /* Bound on number of candidates below that all candidates are considered.  */
 651
 652 #define CONSIDER_ALL_CANDIDATES_BOUND \
 653   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 654
 655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 656    optimizing such a loop would help, and it would take ages).  */
 657
 658 #define MAX_CONSIDERED_GROUPS \
 659   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 660
 661 /* If there are at most this number of ivs in the set, try removing unnecessary
 662    ivs from the set always.  */
 663
 664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 665   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 666
 667 /* The list of trees for that the decl_rtl field must be reset is stored
 668    here.  */
 669
 670 static vec<tree> decl_rtl_to_reset;
 671
 672 static comp_cost force_expr_to_var_cost (tree, bool);
 673
 674 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 675
 676 edge
 677 single_dom_exit (struct loop *loop)
 678 {
 679   edge exit = single_exit (loop);
 680
 681   if (!exit)
 682     return NULL;
 683
 684   if (!just_once_each_iteration_p (loop, exit->src))
 685     return NULL;
 686
 687   return exit;
 688 }
 689
 690 /* Dumps information about the induction variable IV to FILE.  Don't dump
 691    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 692    preceding spaces indicated by INDENT_LEVEL.  */
 693
 694 void
 695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 696 {
 697   const char *p;
 698   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 699
 700   if (indent_level > 4)
 701     indent_level = 4;
 702   p = spaces + 8 - (indent_level << 1);
 703
 704   fprintf (file, "%sIV struct:\n", p);
 705   if (iv->ssa_name && dump_name)
 706     {
 707       fprintf (file, "%s  SSA_NAME:\t", p);
 708       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 709       fprintf (file, "\n");
 710     }
 711
 712   fprintf (file, "%s  Type:\t", p);
 713   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 714   fprintf (file, "\n");
 715
 716   fprintf (file, "%s  Base:\t", p);
 717   print_generic_expr (file, iv->base, TDF_SLIM);
 718   fprintf (file, "\n");
 719
 720   fprintf (file, "%s  Step:\t", p);
 721   print_generic_expr (file, iv->step, TDF_SLIM);
 722   fprintf (file, "\n");
 723
 724   if (iv->base_object)
 725     {
 726       fprintf (file, "%s  Object:\t", p);
 727       print_generic_expr (file, iv->base_object, TDF_SLIM);
 728       fprintf (file, "\n");
 729     }
 730
 731   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 732
 733   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 734            p, iv->no_overflow ? "No-overflow" : "Overflow");
 735 }
 736
 737 /* Dumps information about the USE to FILE.  */
 738
 739 void
 740 dump_use (FILE *file, struct iv_use *use)
 741 {
 742   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 743   fprintf (file, "    At stmt:\t");
 744   print_gimple_stmt (file, use->stmt, 0, 0);
 745   fprintf (file, "    At pos:\t");
 746   if (use->op_p)
 747     print_generic_expr (file, *use->op_p, TDF_SLIM);
 748   fprintf (file, "\n");
 749   dump_iv (file, use->iv, false, 2);
 750 }
 751
 752 /* Dumps information about the uses to FILE.  */
 753
 754 void
 755 dump_groups (FILE *file, struct ivopts_data *data)
 756 {
 757   unsigned i, j;
 758   struct iv_group *group;
 759
 760   for (i = 0; i < data->vgroups.length (); i++)
 761     {
 762       group = data->vgroups[i];
 763       fprintf (file, "Group %d:\n", group->id);
 764       if (group->type == USE_NONLINEAR_EXPR)
 765         fprintf (file, "  Type:\tGENERIC\n");
 766       else if (group->type == USE_ADDRESS)
 767         fprintf (file, "  Type:\tADDRESS\n");
 768       else
 769         {
 770           gcc_assert (group->type == USE_COMPARE);
 771           fprintf (file, "  Type:\tCOMPARE\n");
 772         }
 773       for (j = 0; j < group->vuses.length (); j++)
 774         dump_use (file, group->vuses[j]);
 775     }
 776 }
 777
 778 /* Dumps information about induction variable candidate CAND to FILE.  */
 779
 780 void
 781 dump_cand (FILE *file, struct iv_cand *cand)
 782 {
 783   struct iv *iv = cand->iv;
 784
 785   fprintf (file, "Candidate %d:\n", cand->id);
 786   if (cand->depends_on)
 787     {
 788       fprintf (file, "  Depend on: ");
 789       dump_bitmap (file, cand->depends_on);
 790     }
 791
 792   if (cand->var_before)
 793     {
 794       fprintf (file, "  Var befor: ");
 795       print_generic_expr (file, cand->var_before, TDF_SLIM);
 796       fprintf (file, "\n");
 797     }
 798   if (cand->var_after)
 799     {
 800       fprintf (file, "  Var after: ");
 801       print_generic_expr (file, cand->var_after, TDF_SLIM);
 802       fprintf (file, "\n");
 803     }
 804
 805   switch (cand->pos)
 806     {
 807     case IP_NORMAL:
 808       fprintf (file, "  Incr POS: before exit test\n");
 809       break;
 810
 811     case IP_BEFORE_USE:
 812       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 813       break;
 814
 815     case IP_AFTER_USE:
 816       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 817       break;
 818
 819     case IP_END:
 820       fprintf (file, "  Incr POS: at end\n");
 821       break;
 822
 823     case IP_ORIGINAL:
 824       fprintf (file, "  Incr POS: orig biv\n");
 825       break;
 826     }
 827
 828   dump_iv (file, iv, false, 1);
 829 }
 830
 831 /* Returns the info for ssa version VER.  */
 832
 833 static inline struct version_info *
 834 ver_info (struct ivopts_data *data, unsigned ver)
 835 {
 836   return data->version_info + ver;
 837 }
 838
 839 /* Returns the info for ssa name NAME.  */
 840
 841 static inline struct version_info *
 842 name_info (struct ivopts_data *data, tree name)
 843 {
 844   return ver_info (data, SSA_NAME_VERSION (name));
 845 }
 846
 847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 848    emitted in LOOP.  */
 849
 850 static bool
 851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 852 {
 853   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 854
 855   gcc_assert (bb);
 856
 857   if (sbb == loop->latch)
 858     return true;
 859
 860   if (sbb != bb)
 861     return false;
 862
 863   return stmt == last_stmt (bb);
 864 }
 865
 866 /* Returns true if STMT if after the place where the original induction
 867    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 868    if the positions are identical.  */
 869
 870 static bool
 871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 872 {
 873   basic_block cand_bb = gimple_bb (cand->incremented_at);
 874   basic_block stmt_bb = gimple_bb (stmt);
 875
 876   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 877     return false;
 878
 879   if (stmt_bb != cand_bb)
 880     return true;
 881
 882   if (true_if_equal
 883       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 884     return true;
 885   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 886 }
 887
 888 /* Returns true if STMT if after the place where the induction variable
 889    CAND is incremented in LOOP.  */
 890
 891 static bool
 892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 893 {
 894   switch (cand->pos)
 895     {
 896     case IP_END:
 897       return false;
 898
 899     case IP_NORMAL:
 900       return stmt_after_ip_normal_pos (loop, stmt);
 901
 902     case IP_ORIGINAL:
 903     case IP_AFTER_USE:
 904       return stmt_after_inc_pos (cand, stmt, false);
 905
 906     case IP_BEFORE_USE:
 907       return stmt_after_inc_pos (cand, stmt, true);
 908
 909     default:
 910       gcc_unreachable ();
 911     }
 912 }
 913
 914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 915
 916 static bool
 917 abnormal_ssa_name_p (tree exp)
 918 {
 919   if (!exp)
 920     return false;
 921
 922   if (TREE_CODE (exp) != SSA_NAME)
 923     return false;
 924
 925   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 926 }
 927
 928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 929    abnormal phi node.  Callback for for_each_index.  */
 930
 931 static bool
 932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 933                                   void *data ATTRIBUTE_UNUSED)
 934 {
 935   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 936     {
 937       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 938         return false;
 939       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 940         return false;
 941     }
 942
 943   return !abnormal_ssa_name_p (*index);
 944 }
 945
 946 /* Returns true if EXPR contains a ssa name that occurs in an
 947    abnormal phi node.  */
 948
 949 bool
 950 contains_abnormal_ssa_name_p (tree expr)
 951 {
 952   enum tree_code code;
 953   enum tree_code_class codeclass;
 954
 955   if (!expr)
 956     return false;
 957
 958   code = TREE_CODE (expr);
 959   codeclass = TREE_CODE_CLASS (code);
 960
 961   if (code == SSA_NAME)
 962     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 963
 964   if (code == INTEGER_CST
 965       || is_gimple_min_invariant (expr))
 966     return false;
 967
 968   if (code == ADDR_EXPR)
 969     return !for_each_index (&TREE_OPERAND (expr, 0),
 970                             idx_contains_abnormal_ssa_name_p,
 971                             NULL);
 972
 973   if (code == COND_EXPR)
 974     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 975       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 976       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 977
 978   switch (codeclass)
 979     {
 980     case tcc_binary:
 981     case tcc_comparison:
 982       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 983         return true;
 984
 985       /* Fallthru.  */
 986     case tcc_unary:
 987       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 988         return true;
 989
 990       break;
 991
 992     default:
 993       gcc_unreachable ();
 994     }
 995
 996   return false;
 997 }
 998
 999 /*  Returns the structure describing number of iterations determined from
1000     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1001
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1004 {
1005   struct tree_niter_desc *desc;
1006   tree_niter_desc **slot;
1007
1008   if (!data->niters)
1009     {
1010       data->niters = new hash_map<edge, tree_niter_desc *>;
1011       slot = NULL;
1012     }
1013   else
1014     slot = data->niters->get (exit);
1015
1016   if (!slot)
1017     {
1018       /* Try to determine number of iterations.  We cannot safely work with ssa
1019          names that appear in phi nodes on abnormal edges, so that we do not
1020          create overlapping life ranges for them (PR 27283).  */
1021       desc = XNEW (struct tree_niter_desc);
1022       if (!number_of_iterations_exit (data->current_loop,
1023                                       exit, desc, true)
1024           || contains_abnormal_ssa_name_p (desc->niter))
1025         {
1026           XDELETE (desc);
1027           desc = NULL;
1028         }
1029       data->niters->put (exit, desc);
1030     }
1031   else
1032     desc = *slot;
1033
1034   return desc;
1035 }
1036
1037 /* Returns the structure describing number of iterations determined from
1038    single dominating exit of DATA->current_loop, or NULL if something
1039    goes wrong.  */
1040
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1043 {
1044   edge exit = single_dom_exit (data->current_loop);
1045
1046   if (!exit)
1047     return NULL;
1048
1049   return niter_for_exit (data, exit);
1050 }
1051
1052 /* Initializes data structures used by the iv optimization pass, stored
1053    in DATA.  */
1054
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1057 {
1058   data->version_info_size = 2 * num_ssa_names;
1059   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060   data->relevant = BITMAP_ALLOC (NULL);
1061   data->important_candidates = BITMAP_ALLOC (NULL);
1062   data->max_inv_id = 0;
1063   data->niters = NULL;
1064   data->vgroups.create (20);
1065   data->vcands.create (20);
1066   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067   data->max_inv_expr_id = 0;
1068   data->name_expansion_cache = NULL;
1069   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070   data->iv_common_cands.create (20);
1071   decl_rtl_to_reset.create (20);
1072   gcc_obstack_init (&data->iv_obstack);
1073 }
1074
1075 /* Returns a memory object to that EXPR points.  In case we are able to
1076    determine that it does not point to any such object, NULL is returned.  */
1077
1078 static tree
1079 determine_base_object (tree expr)
1080 {
1081   enum tree_code code = TREE_CODE (expr);
1082   tree base, obj;
1083
1084   /* If this is a pointer casted to any type, we need to determine
1085      the base object for the pointer; so handle conversions before
1086      throwing away non-pointer expressions.  */
1087   if (CONVERT_EXPR_P (expr))
1088     return determine_base_object (TREE_OPERAND (expr, 0));
1089
1090   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091     return NULL_TREE;
1092
1093   switch (code)
1094     {
1095     case INTEGER_CST:
1096       return NULL_TREE;
1097
1098     case ADDR_EXPR:
1099       obj = TREE_OPERAND (expr, 0);
1100       base = get_base_address (obj);
1101
1102       if (!base)
1103         return expr;
1104
1105       if (TREE_CODE (base) == MEM_REF)
1106         return determine_base_object (TREE_OPERAND (base, 0));
1107
1108       return fold_convert (ptr_type_node,
1109                            build_fold_addr_expr (base));
1110
1111     case POINTER_PLUS_EXPR:
1112       return determine_base_object (TREE_OPERAND (expr, 0));
1113
1114     case PLUS_EXPR:
1115     case MINUS_EXPR:
1116       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1117       gcc_unreachable ();
1118
1119     default:
1120       return fold_convert (ptr_type_node, expr);
1121     }
1122 }
1123
1124 /* Return true if address expression with non-DECL_P operand appears
1125    in EXPR.  */
1126
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1129 {
1130   bool res = false;
1131
1132   STRIP_NOPS (expr);
1133   switch (TREE_CODE (expr))
1134     {
1135     case POINTER_PLUS_EXPR:
1136     case PLUS_EXPR:
1137     case MINUS_EXPR:
1138       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140       break;
1141
1142     case ADDR_EXPR:
1143       return (!DECL_P (TREE_OPERAND (expr, 0)));
1144
1145     default:
1146       return false;
1147     }
1148
1149   return res;
1150 }
1151
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1154
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157           bool no_overflow = false)
1158 {
1159   tree expr = base;
1160   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161                                               sizeof (struct iv));
1162   gcc_assert (step != NULL_TREE);
1163
1164   /* Lower address expression in base except ones with DECL_P as operand.
1165      By doing this:
1166        1) More accurate cost can be computed for address expressions;
1167        2) Duplicate candidates won't be created for bases in different
1168           forms, like &a[0] and &a.  */
1169   STRIP_NOPS (expr);
1170   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171       || contain_complex_addr_expr (expr))
1172     {
1173       aff_tree comb;
1174       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1175       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1176     }
1177
1178   iv->base = base;
1179   iv->base_object = determine_base_object (base);
1180   iv->step = step;
1181   iv->biv_p = false;
1182   iv->nonlin_use = NULL;
1183   iv->ssa_name = NULL_TREE;
1184   iv->no_overflow = no_overflow;
1185   iv->have_address_use = false;
1186
1187   return iv;
1188 }
1189
1190 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1191    doesn't overflow.  */
1192
1193 static void
1194 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1195         bool no_overflow)
1196 {
1197   struct version_info *info = name_info (data, iv);
1198
1199   gcc_assert (!info->iv);
1200
1201   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1202   info->iv = alloc_iv (data, base, step, no_overflow);
1203   info->iv->ssa_name = iv;
1204 }
1205
1206 /* Finds induction variable declaration for VAR.  */
1207
1208 static struct iv *
1209 get_iv (struct ivopts_data *data, tree var)
1210 {
1211   basic_block bb;
1212   tree type = TREE_TYPE (var);
1213
1214   if (!POINTER_TYPE_P (type)
1215       && !INTEGRAL_TYPE_P (type))
1216     return NULL;
1217
1218   if (!name_info (data, var)->iv)
1219     {
1220       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1221
1222       if (!bb
1223           || !flow_bb_inside_loop_p (data->current_loop, bb))
1224         set_iv (data, var, var, build_int_cst (type, 0), true);
1225     }
1226
1227   return name_info (data, var)->iv;
1228 }
1229
1230 /* Return the first non-invariant ssa var found in EXPR.  */
1231
1232 static tree
1233 extract_single_var_from_expr (tree expr)
1234 {
1235   int i, n;
1236   tree tmp;
1237   enum tree_code code;
1238
1239   if (!expr || is_gimple_min_invariant (expr))
1240     return NULL;
1241
1242   code = TREE_CODE (expr);
1243   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1244     {
1245       n = TREE_OPERAND_LENGTH (expr);
1246       for (i = 0; i < n; i++)
1247         {
1248           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1249
1250           if (tmp)
1251             return tmp;
1252         }
1253     }
1254   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1255 }
1256
1257 /* Finds basic ivs.  */
1258
1259 static bool
1260 find_bivs (struct ivopts_data *data)
1261 {
1262   gphi *phi;
1263   affine_iv iv;
1264   tree step, type, base, stop;
1265   bool found = false;
1266   struct loop *loop = data->current_loop;
1267   gphi_iterator psi;
1268
1269   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1270     {
1271       phi = psi.phi ();
1272
1273       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1274         continue;
1275
1276       if (virtual_operand_p (PHI_RESULT (phi)))
1277         continue;
1278
1279       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1280         continue;
1281
1282       if (integer_zerop (iv.step))
1283         continue;
1284
1285       step = iv.step;
1286       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1287       /* Stop expanding iv base at the first ssa var referred by iv step.
1288          Ideally we should stop at any ssa var, because that's expensive
1289          and unusual to happen, we just do it on the first one.
1290
1291          See PR64705 for the rationale.  */
1292       stop = extract_single_var_from_expr (step);
1293       base = expand_simple_operations (base, stop);
1294       if (contains_abnormal_ssa_name_p (base)
1295           || contains_abnormal_ssa_name_p (step))
1296         continue;
1297
1298       type = TREE_TYPE (PHI_RESULT (phi));
1299       base = fold_convert (type, base);
1300       if (step)
1301         {
1302           if (POINTER_TYPE_P (type))
1303             step = convert_to_ptrofftype (step);
1304           else
1305             step = fold_convert (type, step);
1306         }
1307
1308       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1309       found = true;
1310     }
1311
1312   return found;
1313 }
1314
1315 /* Marks basic ivs.  */
1316
1317 static void
1318 mark_bivs (struct ivopts_data *data)
1319 {
1320   gphi *phi;
1321   gimple *def;
1322   tree var;
1323   struct iv *iv, *incr_iv;
1324   struct loop *loop = data->current_loop;
1325   basic_block incr_bb;
1326   gphi_iterator psi;
1327
1328   data->bivs_not_used_in_addr = 0;
1329   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1330     {
1331       phi = psi.phi ();
1332
1333       iv = get_iv (data, PHI_RESULT (phi));
1334       if (!iv)
1335         continue;
1336
1337       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1338       def = SSA_NAME_DEF_STMT (var);
1339       /* Don't mark iv peeled from other one as biv.  */
1340       if (def
1341           && gimple_code (def) == GIMPLE_PHI
1342           && gimple_bb (def) == loop->header)
1343         continue;
1344
1345       incr_iv = get_iv (data, var);
1346       if (!incr_iv)
1347         continue;
1348
1349       /* If the increment is in the subloop, ignore it.  */
1350       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1351       if (incr_bb->loop_father != data->current_loop
1352           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1353         continue;
1354
1355       iv->biv_p = true;
1356       incr_iv->biv_p = true;
1357       if (iv->no_overflow)
1358         data->bivs_not_used_in_addr++;
1359       if (incr_iv->no_overflow)
1360         data->bivs_not_used_in_addr++;
1361     }
1362 }
1363
1364 /* Checks whether STMT defines a linear induction variable and stores its
1365    parameters to IV.  */
1366
1367 static bool
1368 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1369 {
1370   tree lhs, stop;
1371   struct loop *loop = data->current_loop;
1372
1373   iv->base = NULL_TREE;
1374   iv->step = NULL_TREE;
1375
1376   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1377     return false;
1378
1379   lhs = gimple_assign_lhs (stmt);
1380   if (TREE_CODE (lhs) != SSA_NAME)
1381     return false;
1382
1383   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1384     return false;
1385
1386   /* Stop expanding iv base at the first ssa var referred by iv step.
1387      Ideally we should stop at any ssa var, because that's expensive
1388      and unusual to happen, we just do it on the first one.
1389
1390      See PR64705 for the rationale.  */
1391   stop = extract_single_var_from_expr (iv->step);
1392   iv->base = expand_simple_operations (iv->base, stop);
1393   if (contains_abnormal_ssa_name_p (iv->base)
1394       || contains_abnormal_ssa_name_p (iv->step))
1395     return false;
1396
1397   /* If STMT could throw, then do not consider STMT as defining a GIV.
1398      While this will suppress optimizations, we can not safely delete this
1399      GIV and associated statements, even if it appears it is not used.  */
1400   if (stmt_could_throw_p (stmt))
1401     return false;
1402
1403   return true;
1404 }
1405
1406 /* Finds general ivs in statement STMT.  */
1407
1408 static void
1409 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1410 {
1411   affine_iv iv;
1412
1413   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1414     return;
1415
1416   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1417 }
1418
1419 /* Finds general ivs in basic block BB.  */
1420
1421 static void
1422 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1423 {
1424   gimple_stmt_iterator bsi;
1425
1426   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1427     find_givs_in_stmt (data, gsi_stmt (bsi));
1428 }
1429
1430 /* Finds general ivs.  */
1431
1432 static void
1433 find_givs (struct ivopts_data *data)
1434 {
1435   struct loop *loop = data->current_loop;
1436   basic_block *body = get_loop_body_in_dom_order (loop);
1437   unsigned i;
1438
1439   for (i = 0; i < loop->num_nodes; i++)
1440     find_givs_in_bb (data, body[i]);
1441   free (body);
1442 }
1443
1444 /* For each ssa name defined in LOOP determines whether it is an induction
1445    variable and if so, its initial value and step.  */
1446
1447 static bool
1448 find_induction_variables (struct ivopts_data *data)
1449 {
1450   unsigned i;
1451   bitmap_iterator bi;
1452
1453   if (!find_bivs (data))
1454     return false;
1455
1456   find_givs (data);
1457   mark_bivs (data);
1458
1459   if (dump_file && (dump_flags & TDF_DETAILS))
1460     {
1461       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1462
1463       if (niter)
1464         {
1465           fprintf (dump_file, "  number of iterations ");
1466           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1467           if (!integer_zerop (niter->may_be_zero))
1468             {
1469               fprintf (dump_file, "; zero if ");
1470               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1471             }
1472           fprintf (dump_file, "\n");
1473         };
1474
1475       fprintf (dump_file, "\n<Induction Vars>:\n");
1476       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1477         {
1478           struct version_info *info = ver_info (data, i);
1479           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1480             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1481         }
1482     }
1483
1484   return true;
1485 }
1486
1487 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1488    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1489    is the const offset stripped from IV base; for other types use, both
1490    are zero by default.  */
1491
1492 static struct iv_use *
1493 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1494             gimple *stmt, enum use_type type, tree addr_base,
1495             unsigned HOST_WIDE_INT addr_offset)
1496 {
1497   struct iv_use *use = XCNEW (struct iv_use);
1498
1499   use->id = group->vuses.length ();
1500   use->group_id = group->id;
1501   use->type = type;
1502   use->iv = iv;
1503   use->stmt = stmt;
1504   use->op_p = use_p;
1505   use->addr_base = addr_base;
1506   use->addr_offset = addr_offset;
1507
1508   group->vuses.safe_push (use);
1509   return use;
1510 }
1511
1512 /* Checks whether OP is a loop-level invariant and if so, records it.
1513    NONLINEAR_USE is true if the invariant is used in a way we do not
1514    handle specially.  */
1515
1516 static void
1517 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1518 {
1519   basic_block bb;
1520   struct version_info *info;
1521
1522   if (TREE_CODE (op) != SSA_NAME
1523       || virtual_operand_p (op))
1524     return;
1525
1526   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1527   if (bb
1528       && flow_bb_inside_loop_p (data->current_loop, bb))
1529     return;
1530
1531   info = name_info (data, op);
1532   info->name = op;
1533   info->has_nonlin_use |= nonlinear_use;
1534   if (!info->inv_id)
1535     info->inv_id = ++data->max_inv_id;
1536   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1537 }
1538
1539 static tree
1540 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1541
1542 /* Record a group of TYPE.  */
1543
1544 static struct iv_group *
1545 record_group (struct ivopts_data *data, enum use_type type)
1546 {
1547   struct iv_group *group = XCNEW (struct iv_group);
1548
1549   group->id = data->vgroups.length ();
1550   group->type = type;
1551   group->related_cands = BITMAP_ALLOC (NULL);
1552   group->vuses.create (1);
1553
1554   data->vgroups.safe_push (group);
1555   return group;
1556 }
1557
1558 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1559    New group will be created if there is no existing group for the use.  */
1560
1561 static struct iv_use *
1562 record_group_use (struct ivopts_data *data, tree *use_p,
1563                   struct iv *iv, gimple *stmt, enum use_type type)
1564 {
1565   tree addr_base = NULL;
1566   struct iv_group *group = NULL;
1567   unsigned HOST_WIDE_INT addr_offset = 0;
1568
1569   /* Record non address type use in a new group.  */
1570   if (type == USE_ADDRESS && iv->base_object)
1571     {
1572       unsigned int i;
1573
1574       addr_base = strip_offset (iv->base, &addr_offset);
1575       for (i = 0; i < data->vgroups.length (); i++)
1576         {
1577           struct iv_use *use;
1578
1579           group = data->vgroups[i];
1580           use = group->vuses[0];
1581           if (use->type != USE_ADDRESS || !use->iv->base_object)
1582             continue;
1583
1584           /* Check if it has the same stripped base and step.  */
1585           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1586               && operand_equal_p (iv->step, use->iv->step, 0)
1587               && operand_equal_p (addr_base, use->addr_base, 0))
1588             break;
1589         }
1590       if (i == data->vgroups.length ())
1591         group = NULL;
1592     }
1593
1594   if (!group)
1595     group = record_group (data, type);
1596
1597   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1598 }
1599
1600 /* Checks whether the use OP is interesting and if so, records it.  */
1601
1602 static struct iv_use *
1603 find_interesting_uses_op (struct ivopts_data *data, tree op)
1604 {
1605   struct iv *iv;
1606   gimple *stmt;
1607   struct iv_use *use;
1608
1609   if (TREE_CODE (op) != SSA_NAME)
1610     return NULL;
1611
1612   iv = get_iv (data, op);
1613   if (!iv)
1614     return NULL;
1615
1616   if (iv->nonlin_use)
1617     {
1618       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1619       return iv->nonlin_use;
1620     }
1621
1622   if (integer_zerop (iv->step))
1623     {
1624       record_invariant (data, op, true);
1625       return NULL;
1626     }
1627
1628   stmt = SSA_NAME_DEF_STMT (op);
1629   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1630
1631   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1632   iv->nonlin_use = use;
1633   return use;
1634 }
1635
1636 /* Given a condition in statement STMT, checks whether it is a compare
1637    of an induction variable and an invariant.  If this is the case,
1638    CONTROL_VAR is set to location of the iv, BOUND to the location of
1639    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1640    induction variable descriptions, and true is returned.  If this is not
1641    the case, CONTROL_VAR and BOUND are set to the arguments of the
1642    condition and false is returned.  */
1643
1644 static bool
1645 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1646                        tree **control_var, tree **bound,
1647                        struct iv **iv_var, struct iv **iv_bound)
1648 {
1649   /* The objects returned when COND has constant operands.  */
1650   static struct iv const_iv;
1651   static tree zero;
1652   tree *op0 = &zero, *op1 = &zero;
1653   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1654   bool ret = false;
1655
1656   if (gimple_code (stmt) == GIMPLE_COND)
1657     {
1658       gcond *cond_stmt = as_a <gcond *> (stmt);
1659       op0 = gimple_cond_lhs_ptr (cond_stmt);
1660       op1 = gimple_cond_rhs_ptr (cond_stmt);
1661     }
1662   else
1663     {
1664       op0 = gimple_assign_rhs1_ptr (stmt);
1665       op1 = gimple_assign_rhs2_ptr (stmt);
1666     }
1667
1668   zero = integer_zero_node;
1669   const_iv.step = integer_zero_node;
1670
1671   if (TREE_CODE (*op0) == SSA_NAME)
1672     iv0 = get_iv (data, *op0);
1673   if (TREE_CODE (*op1) == SSA_NAME)
1674     iv1 = get_iv (data, *op1);
1675
1676   /* Exactly one of the compared values must be an iv, and the other one must
1677      be an invariant.  */
1678   if (!iv0 || !iv1)
1679     goto end;
1680
1681   if (integer_zerop (iv0->step))
1682     {
1683       /* Control variable may be on the other side.  */
1684       std::swap (op0, op1);
1685       std::swap (iv0, iv1);
1686     }
1687   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1688
1689 end:
1690   if (control_var)
1691     *control_var = op0;
1692   if (iv_var)
1693     *iv_var = iv0;
1694   if (bound)
1695     *bound = op1;
1696   if (iv_bound)
1697     *iv_bound = iv1;
1698
1699   return ret;
1700 }
1701
1702 /* Checks whether the condition in STMT is interesting and if so,
1703    records it.  */
1704
1705 static void
1706 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1707 {
1708   tree *var_p, *bound_p;
1709   struct iv *var_iv;
1710
1711   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1712     {
1713       find_interesting_uses_op (data, *var_p);
1714       find_interesting_uses_op (data, *bound_p);
1715       return;
1716     }
1717
1718   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1719 }
1720
1721 /* Returns the outermost loop EXPR is obviously invariant in
1722    relative to the loop LOOP, i.e. if all its operands are defined
1723    outside of the returned loop.  Returns NULL if EXPR is not
1724    even obviously invariant in LOOP.  */
1725
1726 struct loop *
1727 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1728 {
1729   basic_block def_bb;
1730   unsigned i, len;
1731
1732   if (is_gimple_min_invariant (expr))
1733     return current_loops->tree_root;
1734
1735   if (TREE_CODE (expr) == SSA_NAME)
1736     {
1737       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1738       if (def_bb)
1739         {
1740           if (flow_bb_inside_loop_p (loop, def_bb))
1741             return NULL;
1742           return superloop_at_depth (loop,
1743                                      loop_depth (def_bb->loop_father) + 1);
1744         }
1745
1746       return current_loops->tree_root;
1747     }
1748
1749   if (!EXPR_P (expr))
1750     return NULL;
1751
1752   unsigned maxdepth = 0;
1753   len = TREE_OPERAND_LENGTH (expr);
1754   for (i = 0; i < len; i++)
1755     {
1756       struct loop *ivloop;
1757       if (!TREE_OPERAND (expr, i))
1758         continue;
1759
1760       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1761       if (!ivloop)
1762         return NULL;
1763       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1764     }
1765
1766   return superloop_at_depth (loop, maxdepth);
1767 }
1768
1769 /* Returns true if expression EXPR is obviously invariant in LOOP,
1770    i.e. if all its operands are defined outside of the LOOP.  LOOP
1771    should not be the function body.  */
1772
1773 bool
1774 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1775 {
1776   basic_block def_bb;
1777   unsigned i, len;
1778
1779   gcc_assert (loop_depth (loop) > 0);
1780
1781   if (is_gimple_min_invariant (expr))
1782     return true;
1783
1784   if (TREE_CODE (expr) == SSA_NAME)
1785     {
1786       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1787       if (def_bb
1788           && flow_bb_inside_loop_p (loop, def_bb))
1789         return false;
1790
1791       return true;
1792     }
1793
1794   if (!EXPR_P (expr))
1795     return false;
1796
1797   len = TREE_OPERAND_LENGTH (expr);
1798   for (i = 0; i < len; i++)
1799     if (TREE_OPERAND (expr, i)
1800         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1801       return false;
1802
1803   return true;
1804 }
1805
1806 /* Given expression EXPR which computes inductive values with respect
1807    to loop recorded in DATA, this function returns biv from which EXPR
1808    is derived by tracing definition chains of ssa variables in EXPR.  */
1809
1810 static struct iv*
1811 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1812 {
1813   struct iv *iv;
1814   unsigned i, n;
1815   tree e2, e1;
1816   enum tree_code code;
1817   gimple *stmt;
1818
1819   if (expr == NULL_TREE)
1820     return NULL;
1821
1822   if (is_gimple_min_invariant (expr))
1823     return NULL;
1824
1825   code = TREE_CODE (expr);
1826   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1827     {
1828       n = TREE_OPERAND_LENGTH (expr);
1829       for (i = 0; i < n; i++)
1830         {
1831           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1832           if (iv)
1833             return iv;
1834         }
1835     }
1836
1837   /* Stop if it's not ssa name.  */
1838   if (code != SSA_NAME)
1839     return NULL;
1840
1841   iv = get_iv (data, expr);
1842   if (!iv || integer_zerop (iv->step))
1843     return NULL;
1844   else if (iv->biv_p)
1845     return iv;
1846
1847   stmt = SSA_NAME_DEF_STMT (expr);
1848   if (gphi *phi = dyn_cast <gphi *> (stmt))
1849     {
1850       ssa_op_iter iter;
1851       use_operand_p use_p;
1852
1853       if (virtual_operand_p (gimple_phi_result (phi)))
1854         return NULL;
1855
1856       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1857         {
1858           tree use = USE_FROM_PTR (use_p);
1859           iv = find_deriving_biv_for_expr (data, use);
1860           if (iv)
1861             return iv;
1862         }
1863       return NULL;
1864     }
1865   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1866     return NULL;
1867
1868   e1 = gimple_assign_rhs1 (stmt);
1869   code = gimple_assign_rhs_code (stmt);
1870   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1871     return find_deriving_biv_for_expr (data, e1);
1872
1873   switch (code)
1874     {
1875     case MULT_EXPR:
1876     case PLUS_EXPR:
1877     case MINUS_EXPR:
1878     case POINTER_PLUS_EXPR:
1879       /* Increments, decrements and multiplications by a constant
1880          are simple.  */
1881       e2 = gimple_assign_rhs2 (stmt);
1882       iv = find_deriving_biv_for_expr (data, e2);
1883       if (iv)
1884         return iv;
1885
1886       /* Fallthru.  */
1887     CASE_CONVERT:
1888       /* Casts are simple.  */
1889       return find_deriving_biv_for_expr (data, e1);
1890
1891     default:
1892       break;
1893     }
1894
1895   return NULL;
1896 }
1897
1898 /* Record BIV, its predecessor and successor that they are used in
1899    address type uses.  */
1900
1901 static void
1902 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1903 {
1904   unsigned i;
1905   tree type, base_1, base_2;
1906   bitmap_iterator bi;
1907
1908   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1909       || biv->have_address_use || !biv->no_overflow)
1910     return;
1911
1912   type = TREE_TYPE (biv->base);
1913   if (!INTEGRAL_TYPE_P (type))
1914     return;
1915
1916   biv->have_address_use = true;
1917   data->bivs_not_used_in_addr--;
1918   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1919   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1920     {
1921       struct iv *iv = ver_info (data, i)->iv;
1922
1923       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1924           || iv->have_address_use || !iv->no_overflow)
1925         continue;
1926
1927       if (type != TREE_TYPE (iv->base)
1928           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1929         continue;
1930
1931       if (!operand_equal_p (biv->step, iv->step, 0))
1932         continue;
1933
1934       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1935       if (operand_equal_p (base_1, iv->base, 0)
1936           || operand_equal_p (base_2, biv->base, 0))
1937         {
1938           iv->have_address_use = true;
1939           data->bivs_not_used_in_addr--;
1940         }
1941     }
1942 }
1943
1944 /* Cumulates the steps of indices into DATA and replaces their values with the
1945    initial ones.  Returns false when the value of the index cannot be determined.
1946    Callback for for_each_index.  */
1947
1948 struct ifs_ivopts_data
1949 {
1950   struct ivopts_data *ivopts_data;
1951   gimple *stmt;
1952   tree step;
1953 };
1954
1955 static bool
1956 idx_find_step (tree base, tree *idx, void *data)
1957 {
1958   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1959   struct iv *iv;
1960   bool use_overflow_semantics = false;
1961   tree step, iv_base, iv_step, lbound, off;
1962   struct loop *loop = dta->ivopts_data->current_loop;
1963
1964   /* If base is a component ref, require that the offset of the reference
1965      be invariant.  */
1966   if (TREE_CODE (base) == COMPONENT_REF)
1967     {
1968       off = component_ref_field_offset (base);
1969       return expr_invariant_in_loop_p (loop, off);
1970     }
1971
1972   /* If base is array, first check whether we will be able to move the
1973      reference out of the loop (in order to take its address in strength
1974      reduction).  In order for this to work we need both lower bound
1975      and step to be loop invariants.  */
1976   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1977     {
1978       /* Moreover, for a range, the size needs to be invariant as well.  */
1979       if (TREE_CODE (base) == ARRAY_RANGE_REF
1980           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1981         return false;
1982
1983       step = array_ref_element_size (base);
1984       lbound = array_ref_low_bound (base);
1985
1986       if (!expr_invariant_in_loop_p (loop, step)
1987           || !expr_invariant_in_loop_p (loop, lbound))
1988         return false;
1989     }
1990
1991   if (TREE_CODE (*idx) != SSA_NAME)
1992     return true;
1993
1994   iv = get_iv (dta->ivopts_data, *idx);
1995   if (!iv)
1996     return false;
1997
1998   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1999           *&x[0], which is not folded and does not trigger the
2000           ARRAY_REF path below.  */
2001   *idx = iv->base;
2002
2003   if (integer_zerop (iv->step))
2004     return true;
2005
2006   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2007     {
2008       step = array_ref_element_size (base);
2009
2010       /* We only handle addresses whose step is an integer constant.  */
2011       if (TREE_CODE (step) != INTEGER_CST)
2012         return false;
2013     }
2014   else
2015     /* The step for pointer arithmetics already is 1 byte.  */
2016     step = size_one_node;
2017
2018   iv_base = iv->base;
2019   iv_step = iv->step;
2020   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2021     use_overflow_semantics = true;
2022
2023   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2024                             sizetype, &iv_base, &iv_step, dta->stmt,
2025                             use_overflow_semantics))
2026     {
2027       /* The index might wrap.  */
2028       return false;
2029     }
2030
2031   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2032   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2033
2034   if (dta->ivopts_data->bivs_not_used_in_addr)
2035     {
2036       if (!iv->biv_p)
2037         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2038
2039       record_biv_for_address_use (dta->ivopts_data, iv);
2040     }
2041   return true;
2042 }
2043
2044 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2045    object is passed to it in DATA.  */
2046
2047 static bool
2048 idx_record_use (tree base, tree *idx,
2049                 void *vdata)
2050 {
2051   struct ivopts_data *data = (struct ivopts_data *) vdata;
2052   find_interesting_uses_op (data, *idx);
2053   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2054     {
2055       find_interesting_uses_op (data, array_ref_element_size (base));
2056       find_interesting_uses_op (data, array_ref_low_bound (base));
2057     }
2058   return true;
2059 }
2060
2061 /* If we can prove that TOP = cst * BOT for some constant cst,
2062    store cst to MUL and return true.  Otherwise return false.
2063    The returned value is always sign-extended, regardless of the
2064    signedness of TOP and BOT.  */
2065
2066 static bool
2067 constant_multiple_of (tree top, tree bot, widest_int *mul)
2068 {
2069   tree mby;
2070   enum tree_code code;
2071   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2072   widest_int res, p0, p1;
2073
2074   STRIP_NOPS (top);
2075   STRIP_NOPS (bot);
2076
2077   if (operand_equal_p (top, bot, 0))
2078     {
2079       *mul = 1;
2080       return true;
2081     }
2082
2083   code = TREE_CODE (top);
2084   switch (code)
2085     {
2086     case MULT_EXPR:
2087       mby = TREE_OPERAND (top, 1);
2088       if (TREE_CODE (mby) != INTEGER_CST)
2089         return false;
2090
2091       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2092         return false;
2093
2094       *mul = wi::sext (res * wi::to_widest (mby), precision);
2095       return true;
2096
2097     case PLUS_EXPR:
2098     case MINUS_EXPR:
2099       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2100           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2101         return false;
2102
2103       if (code == MINUS_EXPR)
2104         p1 = -p1;
2105       *mul = wi::sext (p0 + p1, precision);
2106       return true;
2107
2108     case INTEGER_CST:
2109       if (TREE_CODE (bot) != INTEGER_CST)
2110         return false;
2111
2112       p0 = widest_int::from (top, SIGNED);
2113       p1 = widest_int::from (bot, SIGNED);
2114       if (p1 == 0)
2115         return false;
2116       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2117       return res == 0;
2118
2119     default:
2120       return false;
2121     }
2122 }
2123
2124 /* Return true if memory reference REF with step STEP may be unaligned.  */
2125
2126 static bool
2127 may_be_unaligned_p (tree ref, tree step)
2128 {
2129   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2130      thus they are not misaligned.  */
2131   if (TREE_CODE (ref) == TARGET_MEM_REF)
2132     return false;
2133
2134   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2135   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2136     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2137
2138   unsigned HOST_WIDE_INT bitpos;
2139   unsigned int ref_align;
2140   get_object_alignment_1 (ref, &ref_align, &bitpos);
2141   if (ref_align < align
2142       || (bitpos % align) != 0
2143       || (bitpos % BITS_PER_UNIT) != 0)
2144     return true;
2145
2146   unsigned int trailing_zeros = tree_ctz (step);
2147   if (trailing_zeros < HOST_BITS_PER_INT
2148       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2149     return true;
2150
2151   return false;
2152 }
2153
2154 /* Return true if EXPR may be non-addressable.   */
2155
2156 bool
2157 may_be_nonaddressable_p (tree expr)
2158 {
2159   switch (TREE_CODE (expr))
2160     {
2161     case TARGET_MEM_REF:
2162       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2163          target, thus they are always addressable.  */
2164       return false;
2165
2166     case MEM_REF:
2167       /* Likewise for MEM_REFs, modulo the storage order.  */
2168       return REF_REVERSE_STORAGE_ORDER (expr);
2169
2170     case BIT_FIELD_REF:
2171       if (REF_REVERSE_STORAGE_ORDER (expr))
2172         return true;
2173       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2174
2175     case COMPONENT_REF:
2176       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2177         return true;
2178       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2179              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2180
2181     case ARRAY_REF:
2182     case ARRAY_RANGE_REF:
2183       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2184         return true;
2185       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2186
2187     case VIEW_CONVERT_EXPR:
2188       /* This kind of view-conversions may wrap non-addressable objects
2189          and make them look addressable.  After some processing the
2190          non-addressability may be uncovered again, causing ADDR_EXPRs
2191          of inappropriate objects to be built.  */
2192       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2193           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2194         return true;
2195       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2196
2197     CASE_CONVERT:
2198       return true;
2199
2200     default:
2201       break;
2202     }
2203
2204   return false;
2205 }
2206
2207 /* Finds addresses in *OP_P inside STMT.  */
2208
2209 static void
2210 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2211                                tree *op_p)
2212 {
2213   tree base = *op_p, step = size_zero_node;
2214   struct iv *civ;
2215   struct ifs_ivopts_data ifs_ivopts_data;
2216
2217   /* Do not play with volatile memory references.  A bit too conservative,
2218      perhaps, but safe.  */
2219   if (gimple_has_volatile_ops (stmt))
2220     goto fail;
2221
2222   /* Ignore bitfields for now.  Not really something terribly complicated
2223      to handle.  TODO.  */
2224   if (TREE_CODE (base) == BIT_FIELD_REF)
2225     goto fail;
2226
2227   base = unshare_expr (base);
2228
2229   if (TREE_CODE (base) == TARGET_MEM_REF)
2230     {
2231       tree type = build_pointer_type (TREE_TYPE (base));
2232       tree astep;
2233
2234       if (TMR_BASE (base)
2235           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2236         {
2237           civ = get_iv (data, TMR_BASE (base));
2238           if (!civ)
2239             goto fail;
2240
2241           TMR_BASE (base) = civ->base;
2242           step = civ->step;
2243         }
2244       if (TMR_INDEX2 (base)
2245           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2246         {
2247           civ = get_iv (data, TMR_INDEX2 (base));
2248           if (!civ)
2249             goto fail;
2250
2251           TMR_INDEX2 (base) = civ->base;
2252           step = civ->step;
2253         }
2254       if (TMR_INDEX (base)
2255           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2256         {
2257           civ = get_iv (data, TMR_INDEX (base));
2258           if (!civ)
2259             goto fail;
2260
2261           TMR_INDEX (base) = civ->base;
2262           astep = civ->step;
2263
2264           if (astep)
2265             {
2266               if (TMR_STEP (base))
2267                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2268
2269               step = fold_build2 (PLUS_EXPR, type, step, astep);
2270             }
2271         }
2272
2273       if (integer_zerop (step))
2274         goto fail;
2275       base = tree_mem_ref_addr (type, base);
2276     }
2277   else
2278     {
2279       ifs_ivopts_data.ivopts_data = data;
2280       ifs_ivopts_data.stmt = stmt;
2281       ifs_ivopts_data.step = size_zero_node;
2282       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2283           || integer_zerop (ifs_ivopts_data.step))
2284         goto fail;
2285       step = ifs_ivopts_data.step;
2286
2287       /* Check that the base expression is addressable.  This needs
2288          to be done after substituting bases of IVs into it.  */
2289       if (may_be_nonaddressable_p (base))
2290         goto fail;
2291
2292       /* Moreover, on strict alignment platforms, check that it is
2293          sufficiently aligned.  */
2294       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2295         goto fail;
2296
2297       base = build_fold_addr_expr (base);
2298
2299       /* Substituting bases of IVs into the base expression might
2300          have caused folding opportunities.  */
2301       if (TREE_CODE (base) == ADDR_EXPR)
2302         {
2303           tree *ref = &TREE_OPERAND (base, 0);
2304           while (handled_component_p (*ref))
2305             ref = &TREE_OPERAND (*ref, 0);
2306           if (TREE_CODE (*ref) == MEM_REF)
2307             {
2308               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2309                                       TREE_OPERAND (*ref, 0),
2310                                       TREE_OPERAND (*ref, 1));
2311               if (tem)
2312                 *ref = tem;
2313             }
2314         }
2315     }
2316
2317   civ = alloc_iv (data, base, step);
2318   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2319   return;
2320
2321 fail:
2322   for_each_index (op_p, idx_record_use, data);
2323 }
2324
2325 /* Finds and records invariants used in STMT.  */
2326
2327 static void
2328 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2329 {
2330   ssa_op_iter iter;
2331   use_operand_p use_p;
2332   tree op;
2333
2334   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2335     {
2336       op = USE_FROM_PTR (use_p);
2337       record_invariant (data, op, false);
2338     }
2339 }
2340
2341 /* Finds interesting uses of induction variables in the statement STMT.  */
2342
2343 static void
2344 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2345 {
2346   struct iv *iv;
2347   tree op, *lhs, *rhs;
2348   ssa_op_iter iter;
2349   use_operand_p use_p;
2350   enum tree_code code;
2351
2352   find_invariants_stmt (data, stmt);
2353
2354   if (gimple_code (stmt) == GIMPLE_COND)
2355     {
2356       find_interesting_uses_cond (data, stmt);
2357       return;
2358     }
2359
2360   if (is_gimple_assign (stmt))
2361     {
2362       lhs = gimple_assign_lhs_ptr (stmt);
2363       rhs = gimple_assign_rhs1_ptr (stmt);
2364
2365       if (TREE_CODE (*lhs) == SSA_NAME)
2366         {
2367           /* If the statement defines an induction variable, the uses are not
2368              interesting by themselves.  */
2369
2370           iv = get_iv (data, *lhs);
2371
2372           if (iv && !integer_zerop (iv->step))
2373             return;
2374         }
2375
2376       code = gimple_assign_rhs_code (stmt);
2377       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2378           && (REFERENCE_CLASS_P (*rhs)
2379               || is_gimple_val (*rhs)))
2380         {
2381           if (REFERENCE_CLASS_P (*rhs))
2382             find_interesting_uses_address (data, stmt, rhs);
2383           else
2384             find_interesting_uses_op (data, *rhs);
2385
2386           if (REFERENCE_CLASS_P (*lhs))
2387             find_interesting_uses_address (data, stmt, lhs);
2388           return;
2389         }
2390       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2391         {
2392           find_interesting_uses_cond (data, stmt);
2393           return;
2394         }
2395
2396       /* TODO -- we should also handle address uses of type
2397
2398          memory = call (whatever);
2399
2400          and
2401
2402          call (memory).  */
2403     }
2404
2405   if (gimple_code (stmt) == GIMPLE_PHI
2406       && gimple_bb (stmt) == data->current_loop->header)
2407     {
2408       iv = get_iv (data, PHI_RESULT (stmt));
2409
2410       if (iv && !integer_zerop (iv->step))
2411         return;
2412     }
2413
2414   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2415     {
2416       op = USE_FROM_PTR (use_p);
2417
2418       if (TREE_CODE (op) != SSA_NAME)
2419         continue;
2420
2421       iv = get_iv (data, op);
2422       if (!iv)
2423         continue;
2424
2425       find_interesting_uses_op (data, op);
2426     }
2427 }
2428
2429 /* Finds interesting uses of induction variables outside of loops
2430    on loop exit edge EXIT.  */
2431
2432 static void
2433 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2434 {
2435   gphi *phi;
2436   gphi_iterator psi;
2437   tree def;
2438
2439   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2440     {
2441       phi = psi.phi ();
2442       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2443       if (!virtual_operand_p (def))
2444         find_interesting_uses_op (data, def);
2445     }
2446 }
2447
2448 /* Compute maximum offset of [base + offset] addressing mode
2449    for memory reference represented by USE.  */
2450
2451 static HOST_WIDE_INT
2452 compute_max_addr_offset (struct iv_use *use)
2453 {
2454   int width;
2455   rtx reg, addr;
2456   HOST_WIDE_INT i, off;
2457   unsigned list_index, num;
2458   addr_space_t as;
2459   machine_mode mem_mode, addr_mode;
2460   static vec<HOST_WIDE_INT> max_offset_list;
2461
2462   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2463   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2464
2465   num = max_offset_list.length ();
2466   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2467   if (list_index >= num)
2468     {
2469       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2470       for (; num < max_offset_list.length (); num++)
2471         max_offset_list[num] = -1;
2472     }
2473
2474   off = max_offset_list[list_index];
2475   if (off != -1)
2476     return off;
2477
2478   addr_mode = targetm.addr_space.address_mode (as);
2479   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2480   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2481
2482   width = GET_MODE_BITSIZE (addr_mode) - 1;
2483   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2484     width = HOST_BITS_PER_WIDE_INT - 1;
2485
2486   for (i = width; i > 0; i--)
2487     {
2488       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2489       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2490       if (memory_address_addr_space_p (mem_mode, addr, as))
2491         break;
2492
2493       /* For some strict-alignment targets, the offset must be naturally
2494          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2495       off = ((unsigned HOST_WIDE_INT) 1 << i);
2496       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2497         {
2498           off -= GET_MODE_SIZE (mem_mode);
2499           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2500           if (memory_address_addr_space_p (mem_mode, addr, as))
2501             break;
2502         }
2503     }
2504   if (i == 0)
2505     off = 0;
2506
2507   max_offset_list[list_index] = off;
2508   return off;
2509 }
2510
2511 /* Comparison function to sort group in ascending order of addr_offset.  */
2512
2513 static int
2514 group_compare_offset (const void *a, const void *b)
2515 {
2516   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2517   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2518
2519   if ((*u1)->addr_offset != (*u2)->addr_offset)
2520     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2521   else
2522     return 0;
2523 }
2524
2525 /* Check if small groups should be split.  Return true if no group
2526    contains more than two uses with distinct addr_offsets.  Return
2527    false otherwise.  We want to split such groups because:
2528
2529      1) Small groups don't have much benefit and may interfer with
2530         general candidate selection.
2531      2) Size for problem with only small groups is usually small and
2532         general algorithm can handle it well.
2533
2534    TODO -- Above claim may not hold when we want to merge memory
2535    accesses with conseuctive addresses.  */
2536
2537 static bool
2538 split_small_address_groups_p (struct ivopts_data *data)
2539 {
2540   unsigned int i, j, distinct = 1;
2541   struct iv_use *pre;
2542   struct iv_group *group;
2543
2544   for (i = 0; i < data->vgroups.length (); i++)
2545     {
2546       group = data->vgroups[i];
2547       if (group->vuses.length () == 1)
2548         continue;
2549
2550       gcc_assert (group->type == USE_ADDRESS);
2551       if (group->vuses.length () == 2)
2552         {
2553           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2554             std::swap (group->vuses[0], group->vuses[1]);
2555         }
2556       else
2557         group->vuses.qsort (group_compare_offset);
2558
2559       if (distinct > 2)
2560         continue;
2561
2562       distinct = 1;
2563       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2564         {
2565           if (group->vuses[j]->addr_offset != pre->addr_offset)
2566             {
2567               pre = group->vuses[j];
2568               distinct++;
2569             }
2570
2571           if (distinct > 2)
2572             break;
2573         }
2574     }
2575
2576   return (distinct <= 2);
2577 }
2578
2579 /* For each group of address type uses, this function further groups
2580    these uses according to the maximum offset supported by target's
2581    [base + offset] addressing mode.  */
2582
2583 static void
2584 split_address_groups (struct ivopts_data *data)
2585 {
2586   unsigned int i, j;
2587   HOST_WIDE_INT max_offset = -1;
2588
2589   /* Reset max offset to split all small groups.  */
2590   if (split_small_address_groups_p (data))
2591     max_offset = 0;
2592
2593   for (i = 0; i < data->vgroups.length (); i++)
2594     {
2595       struct iv_group *group = data->vgroups[i];
2596       struct iv_use *use = group->vuses[0];
2597
2598       use->id = 0;
2599       use->group_id = group->id;
2600       if (group->vuses.length () == 1)
2601         continue;
2602
2603       if (max_offset != 0)
2604         max_offset = compute_max_addr_offset (use);
2605
2606       for (j = 1; j < group->vuses.length (); j++)
2607         {
2608           struct iv_use *next = group->vuses[j];
2609
2610           /* Only uses with offset that can fit in offset part against
2611              the first use can be grouped together.  */
2612           if (next->addr_offset - use->addr_offset
2613               > (unsigned HOST_WIDE_INT) max_offset)
2614             break;
2615
2616           next->id = j;
2617           next->group_id = group->id;
2618         }
2619       /* Split group.  */
2620       if (j < group->vuses.length ())
2621         {
2622           struct iv_group *new_group = record_group (data, group->type);
2623           new_group->vuses.safe_splice (group->vuses);
2624           new_group->vuses.block_remove (0, j);
2625           group->vuses.truncate (j);
2626         }
2627     }
2628 }
2629
2630 /* Finds uses of the induction variables that are interesting.  */
2631
2632 static void
2633 find_interesting_uses (struct ivopts_data *data)
2634 {
2635   basic_block bb;
2636   gimple_stmt_iterator bsi;
2637   basic_block *body = get_loop_body (data->current_loop);
2638   unsigned i;
2639   edge e;
2640
2641   for (i = 0; i < data->current_loop->num_nodes; i++)
2642     {
2643       edge_iterator ei;
2644       bb = body[i];
2645
2646       FOR_EACH_EDGE (e, ei, bb->succs)
2647         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2648             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2649           find_interesting_uses_outside (data, e);
2650
2651       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2652         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2653       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2654         if (!is_gimple_debug (gsi_stmt (bsi)))
2655           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2656     }
2657
2658   split_address_groups (data);
2659
2660   if (dump_file && (dump_flags & TDF_DETAILS))
2661     {
2662       bitmap_iterator bi;
2663
2664       fprintf (dump_file, "\n<Invariant Vars>:\n");
2665       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2666         {
2667           struct version_info *info = ver_info (data, i);
2668           if (info->inv_id)
2669             {
2670               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2671               print_generic_expr (dump_file, info->name, TDF_SLIM);
2672               fprintf (dump_file, "%s\n",
2673                        info->has_nonlin_use ? "" : "\t(eliminable)");
2674             }
2675         }
2676
2677       fprintf (dump_file, "\n<IV Groups>:\n");
2678       dump_groups (dump_file, data);
2679       fprintf (dump_file, "\n");
2680     }
2681
2682   free (body);
2683 }
2684
2685 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2686    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2687    we are at the top-level of the processed address.  */
2688
2689 static tree
2690 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2691                 HOST_WIDE_INT *offset)
2692 {
2693   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2694   enum tree_code code;
2695   tree type, orig_type = TREE_TYPE (expr);
2696   HOST_WIDE_INT off0, off1, st;
2697   tree orig_expr = expr;
2698
2699   STRIP_NOPS (expr);
2700
2701   type = TREE_TYPE (expr);
2702   code = TREE_CODE (expr);
2703   *offset = 0;
2704
2705   switch (code)
2706     {
2707     case INTEGER_CST:
2708       if (!cst_and_fits_in_hwi (expr)
2709           || integer_zerop (expr))
2710         return orig_expr;
2711
2712       *offset = int_cst_value (expr);
2713       return build_int_cst (orig_type, 0);
2714
2715     case POINTER_PLUS_EXPR:
2716     case PLUS_EXPR:
2717     case MINUS_EXPR:
2718       op0 = TREE_OPERAND (expr, 0);
2719       op1 = TREE_OPERAND (expr, 1);
2720
2721       op0 = strip_offset_1 (op0, false, false, &off0);
2722       op1 = strip_offset_1 (op1, false, false, &off1);
2723
2724       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2725       if (op0 == TREE_OPERAND (expr, 0)
2726           && op1 == TREE_OPERAND (expr, 1))
2727         return orig_expr;
2728
2729       if (integer_zerop (op1))
2730         expr = op0;
2731       else if (integer_zerop (op0))
2732         {
2733           if (code == MINUS_EXPR)
2734             expr = fold_build1 (NEGATE_EXPR, type, op1);
2735           else
2736             expr = op1;
2737         }
2738       else
2739         expr = fold_build2 (code, type, op0, op1);
2740
2741       return fold_convert (orig_type, expr);
2742
2743     case MULT_EXPR:
2744       op1 = TREE_OPERAND (expr, 1);
2745       if (!cst_and_fits_in_hwi (op1))
2746         return orig_expr;
2747
2748       op0 = TREE_OPERAND (expr, 0);
2749       op0 = strip_offset_1 (op0, false, false, &off0);
2750       if (op0 == TREE_OPERAND (expr, 0))
2751         return orig_expr;
2752
2753       *offset = off0 * int_cst_value (op1);
2754       if (integer_zerop (op0))
2755         expr = op0;
2756       else
2757         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2758
2759       return fold_convert (orig_type, expr);
2760
2761     case ARRAY_REF:
2762     case ARRAY_RANGE_REF:
2763       if (!inside_addr)
2764         return orig_expr;
2765
2766       step = array_ref_element_size (expr);
2767       if (!cst_and_fits_in_hwi (step))
2768         break;
2769
2770       st = int_cst_value (step);
2771       op1 = TREE_OPERAND (expr, 1);
2772       op1 = strip_offset_1 (op1, false, false, &off1);
2773       *offset = off1 * st;
2774
2775       if (top_compref
2776           && integer_zerop (op1))
2777         {
2778           /* Strip the component reference completely.  */
2779           op0 = TREE_OPERAND (expr, 0);
2780           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2781           *offset += off0;
2782           return op0;
2783         }
2784       break;
2785
2786     case COMPONENT_REF:
2787       {
2788         tree field;
2789
2790         if (!inside_addr)
2791           return orig_expr;
2792
2793         tmp = component_ref_field_offset (expr);
2794         field = TREE_OPERAND (expr, 1);
2795         if (top_compref
2796             && cst_and_fits_in_hwi (tmp)
2797             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2798           {
2799             HOST_WIDE_INT boffset, abs_off;
2800
2801             /* Strip the component reference completely.  */
2802             op0 = TREE_OPERAND (expr, 0);
2803             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2804             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2805             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2806             if (boffset < 0)
2807               abs_off = -abs_off;
2808
2809             *offset = off0 + int_cst_value (tmp) + abs_off;
2810             return op0;
2811           }
2812       }
2813       break;
2814
2815     case ADDR_EXPR:
2816       op0 = TREE_OPERAND (expr, 0);
2817       op0 = strip_offset_1 (op0, true, true, &off0);
2818       *offset += off0;
2819
2820       if (op0 == TREE_OPERAND (expr, 0))
2821         return orig_expr;
2822
2823       expr = build_fold_addr_expr (op0);
2824       return fold_convert (orig_type, expr);
2825
2826     case MEM_REF:
2827       /* ???  Offset operand?  */
2828       inside_addr = false;
2829       break;
2830
2831     default:
2832       return orig_expr;
2833     }
2834
2835   /* Default handling of expressions for that we want to recurse into
2836      the first operand.  */
2837   op0 = TREE_OPERAND (expr, 0);
2838   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2839   *offset += off0;
2840
2841   if (op0 == TREE_OPERAND (expr, 0)
2842       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2843     return orig_expr;
2844
2845   expr = copy_node (expr);
2846   TREE_OPERAND (expr, 0) = op0;
2847   if (op1)
2848     TREE_OPERAND (expr, 1) = op1;
2849
2850   /* Inside address, we might strip the top level component references,
2851      thus changing type of the expression.  Handling of ADDR_EXPR
2852      will fix that.  */
2853   expr = fold_convert (orig_type, expr);
2854
2855   return expr;
2856 }
2857
2858 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2859
2860 static tree
2861 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2862 {
2863   HOST_WIDE_INT off;
2864   tree core = strip_offset_1 (expr, false, false, &off);
2865   *offset = off;
2866   return core;
2867 }
2868
2869 /* Returns variant of TYPE that can be used as base for different uses.
2870    We return unsigned type with the same precision, which avoids problems
2871    with overflows.  */
2872
2873 static tree
2874 generic_type_for (tree type)
2875 {
2876   if (POINTER_TYPE_P (type))
2877     return unsigned_type_for (type);
2878
2879   if (TYPE_UNSIGNED (type))
2880     return type;
2881
2882   return unsigned_type_for (type);
2883 }
2884
2885 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2886    the bitmap to that we should store it.  */
2887
2888 static struct ivopts_data *fd_ivopts_data;
2889 static tree
2890 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2891 {
2892   bitmap *depends_on = (bitmap *) data;
2893   struct version_info *info;
2894
2895   if (TREE_CODE (*expr_p) != SSA_NAME)
2896     return NULL_TREE;
2897   info = name_info (fd_ivopts_data, *expr_p);
2898
2899   if (!info->inv_id || info->has_nonlin_use)
2900     return NULL_TREE;
2901
2902   if (!*depends_on)
2903     *depends_on = BITMAP_ALLOC (NULL);
2904   bitmap_set_bit (*depends_on, info->inv_id);
2905
2906   return NULL_TREE;
2907 }
2908
2909 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2910    position to POS.  If USE is not NULL, the candidate is set as related to
2911    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2912    replacement of the final value of the iv by a direct computation.  */
2913
2914 static struct iv_cand *
2915 add_candidate_1 (struct ivopts_data *data,
2916                  tree base, tree step, bool important, enum iv_position pos,
2917                  struct iv_use *use, gimple *incremented_at,
2918                  struct iv *orig_iv = NULL)
2919 {
2920   unsigned i;
2921   struct iv_cand *cand = NULL;
2922   tree type, orig_type;
2923
2924   gcc_assert (base && step);
2925
2926   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2927      live, but the ivopts code may replace a real pointer with one
2928      pointing before or after the memory block that is then adjusted
2929      into the memory block during the loop.  FIXME: It would likely be
2930      better to actually force the pointer live and still use ivopts;
2931      for example, it would be enough to write the pointer into memory
2932      and keep it there until after the loop.  */
2933   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2934     return NULL;
2935
2936   /* For non-original variables, make sure their values are computed in a type
2937      that does not invoke undefined behavior on overflows (since in general,
2938      we cannot prove that these induction variables are non-wrapping).  */
2939   if (pos != IP_ORIGINAL)
2940     {
2941       orig_type = TREE_TYPE (base);
2942       type = generic_type_for (orig_type);
2943       if (type != orig_type)
2944         {
2945           base = fold_convert (type, base);
2946           step = fold_convert (type, step);
2947         }
2948     }
2949
2950   for (i = 0; i < data->vcands.length (); i++)
2951     {
2952       cand = data->vcands[i];
2953
2954       if (cand->pos != pos)
2955         continue;
2956
2957       if (cand->incremented_at != incremented_at
2958           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2959               && cand->ainc_use != use))
2960         continue;
2961
2962       if (operand_equal_p (base, cand->iv->base, 0)
2963           && operand_equal_p (step, cand->iv->step, 0)
2964           && (TYPE_PRECISION (TREE_TYPE (base))
2965               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2966         break;
2967     }
2968
2969   if (i == data->vcands.length ())
2970     {
2971       cand = XCNEW (struct iv_cand);
2972       cand->id = i;
2973       cand->iv = alloc_iv (data, base, step);
2974       cand->pos = pos;
2975       if (pos != IP_ORIGINAL)
2976         {
2977           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2978           cand->var_after = cand->var_before;
2979         }
2980       cand->important = important;
2981       cand->incremented_at = incremented_at;
2982       data->vcands.safe_push (cand);
2983
2984       if (TREE_CODE (step) != INTEGER_CST)
2985         {
2986           fd_ivopts_data = data;
2987           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2988         }
2989
2990       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2991         cand->ainc_use = use;
2992       else
2993         cand->ainc_use = NULL;
2994
2995       cand->orig_iv = orig_iv;
2996       if (dump_file && (dump_flags & TDF_DETAILS))
2997         dump_cand (dump_file, cand);
2998     }
2999
3000   cand->important |= important;
3001
3002   /* Relate candidate to the group for which it is added.  */
3003   if (use)
3004     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3005
3006   return cand;
3007 }
3008
3009 /* Returns true if incrementing the induction variable at the end of the LOOP
3010    is allowed.
3011
3012    The purpose is to avoid splitting latch edge with a biv increment, thus
3013    creating a jump, possibly confusing other optimization passes and leaving
3014    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
3015    is not available (so we do not have a better alternative), or if the latch
3016    edge is already nonempty.  */
3017
3018 static bool
3019 allow_ip_end_pos_p (struct loop *loop)
3020 {
3021   if (!ip_normal_pos (loop))
3022     return true;
3023
3024   if (!empty_block_p (ip_end_pos (loop)))
3025     return true;
3026
3027   return false;
3028 }
3029
3030 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3031    Important field is set to IMPORTANT.  */
3032
3033 static void
3034 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3035                         bool important, struct iv_use *use)
3036 {
3037   basic_block use_bb = gimple_bb (use->stmt);
3038   machine_mode mem_mode;
3039   unsigned HOST_WIDE_INT cstepi;
3040
3041   /* If we insert the increment in any position other than the standard
3042      ones, we must ensure that it is incremented once per iteration.
3043      It must not be in an inner nested loop, or one side of an if
3044      statement.  */
3045   if (use_bb->loop_father != data->current_loop
3046       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3047       || stmt_could_throw_p (use->stmt)
3048       || !cst_and_fits_in_hwi (step))
3049     return;
3050
3051   cstepi = int_cst_value (step);
3052
3053   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3054   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3055         || USE_STORE_PRE_INCREMENT (mem_mode))
3056        && GET_MODE_SIZE (mem_mode) == cstepi)
3057       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3058            || USE_STORE_PRE_DECREMENT (mem_mode))
3059           && GET_MODE_SIZE (mem_mode) == -cstepi))
3060     {
3061       enum tree_code code = MINUS_EXPR;
3062       tree new_base;
3063       tree new_step = step;
3064
3065       if (POINTER_TYPE_P (TREE_TYPE (base)))
3066         {
3067           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3068           code = POINTER_PLUS_EXPR;
3069         }
3070       else
3071         new_step = fold_convert (TREE_TYPE (base), new_step);
3072       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3073       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3074                        use->stmt);
3075     }
3076   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3077         || USE_STORE_POST_INCREMENT (mem_mode))
3078        && GET_MODE_SIZE (mem_mode) == cstepi)
3079       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3080            || USE_STORE_POST_DECREMENT (mem_mode))
3081           && GET_MODE_SIZE (mem_mode) == -cstepi))
3082     {
3083       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3084                        use->stmt);
3085     }
3086 }
3087
3088 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3089    position to POS.  If USE is not NULL, the candidate is set as related to
3090    it.  The candidate computation is scheduled before exit condition and at
3091    the end of loop.  */
3092
3093 static void
3094 add_candidate (struct ivopts_data *data,
3095                tree base, tree step, bool important, struct iv_use *use,
3096                struct iv *orig_iv = NULL)
3097 {
3098   if (ip_normal_pos (data->current_loop))
3099     add_candidate_1 (data, base, step, important,
3100                      IP_NORMAL, use, NULL, orig_iv);
3101   if (ip_end_pos (data->current_loop)
3102       && allow_ip_end_pos_p (data->current_loop))
3103     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3104 }
3105
3106 /* Adds standard iv candidates.  */
3107
3108 static void
3109 add_standard_iv_candidates (struct ivopts_data *data)
3110 {
3111   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3112
3113   /* The same for a double-integer type if it is still fast enough.  */
3114   if (TYPE_PRECISION
3115         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3116       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3117     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3118                    build_int_cst (long_integer_type_node, 1), true, NULL);
3119
3120   /* The same for a double-integer type if it is still fast enough.  */
3121   if (TYPE_PRECISION
3122         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3123       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3124     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3125                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3126 }
3127
3128
3129 /* Adds candidates bases on the old induction variable IV.  */
3130
3131 static void
3132 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3133 {
3134   gimple *phi;
3135   tree def;
3136   struct iv_cand *cand;
3137
3138   /* Check if this biv is used in address type use.  */
3139   if (iv->no_overflow  && iv->have_address_use
3140       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3141       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3142     {
3143       tree base = fold_convert (sizetype, iv->base);
3144       tree step = fold_convert (sizetype, iv->step);
3145
3146       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3147       add_candidate (data, base, step, true, NULL, iv);
3148       /* Add iv cand of the original type only if it has nonlinear use.  */
3149       if (iv->nonlin_use)
3150         add_candidate (data, iv->base, iv->step, true, NULL);
3151     }
3152   else
3153     add_candidate (data, iv->base, iv->step, true, NULL);
3154
3155   /* The same, but with initial value zero.  */
3156   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3157     add_candidate (data, size_int (0), iv->step, true, NULL);
3158   else
3159     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3160                    iv->step, true, NULL);
3161
3162   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3163   if (gimple_code (phi) == GIMPLE_PHI)
3164     {
3165       /* Additionally record the possibility of leaving the original iv
3166          untouched.  */
3167       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3168       /* Don't add candidate if it's from another PHI node because
3169          it's an affine iv appearing in the form of PEELED_CHREC.  */
3170       phi = SSA_NAME_DEF_STMT (def);
3171       if (gimple_code (phi) != GIMPLE_PHI)
3172         {
3173           cand = add_candidate_1 (data,
3174                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3175                                   SSA_NAME_DEF_STMT (def));
3176           if (cand)
3177             {
3178               cand->var_before = iv->ssa_name;
3179               cand->var_after = def;
3180             }
3181         }
3182       else
3183         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3184     }
3185 }
3186
3187 /* Adds candidates based on the old induction variables.  */
3188
3189 static void
3190 add_iv_candidate_for_bivs (struct ivopts_data *data)
3191 {
3192   unsigned i;
3193   struct iv *iv;
3194   bitmap_iterator bi;
3195
3196   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3197     {
3198       iv = ver_info (data, i)->iv;
3199       if (iv && iv->biv_p && !integer_zerop (iv->step))
3200         add_iv_candidate_for_biv (data, iv);
3201     }
3202 }
3203
3204 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3205
3206 static void
3207 record_common_cand (struct ivopts_data *data, tree base,
3208                     tree step, struct iv_use *use)
3209 {
3210   struct iv_common_cand ent;
3211   struct iv_common_cand **slot;
3212
3213   ent.base = base;
3214   ent.step = step;
3215   ent.hash = iterative_hash_expr (base, 0);
3216   ent.hash = iterative_hash_expr (step, ent.hash);
3217
3218   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3219   if (*slot == NULL)
3220     {
3221       *slot = new iv_common_cand ();
3222       (*slot)->base = base;
3223       (*slot)->step = step;
3224       (*slot)->uses.create (8);
3225       (*slot)->hash = ent.hash;
3226       data->iv_common_cands.safe_push ((*slot));
3227     }
3228
3229   gcc_assert (use != NULL);
3230   (*slot)->uses.safe_push (use);
3231   return;
3232 }
3233
3234 /* Comparison function used to sort common candidates.  */
3235
3236 static int
3237 common_cand_cmp (const void *p1, const void *p2)
3238 {
3239   unsigned n1, n2;
3240   const struct iv_common_cand *const *const ccand1
3241     = (const struct iv_common_cand *const *)p1;
3242   const struct iv_common_cand *const *const ccand2
3243     = (const struct iv_common_cand *const *)p2;
3244
3245   n1 = (*ccand1)->uses.length ();
3246   n2 = (*ccand2)->uses.length ();
3247   return n2 - n1;
3248 }
3249
3250 /* Adds IV candidates based on common candidated recorded.  */
3251
3252 static void
3253 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3254 {
3255   unsigned i, j;
3256   struct iv_cand *cand_1, *cand_2;
3257
3258   data->iv_common_cands.qsort (common_cand_cmp);
3259   for (i = 0; i < data->iv_common_cands.length (); i++)
3260     {
3261       struct iv_common_cand *ptr = data->iv_common_cands[i];
3262
3263       /* Only add IV candidate if it's derived from multiple uses.  */
3264       if (ptr->uses.length () <= 1)
3265         break;
3266
3267       cand_1 = NULL;
3268       cand_2 = NULL;
3269       if (ip_normal_pos (data->current_loop))
3270         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3271                                   false, IP_NORMAL, NULL, NULL);
3272
3273       if (ip_end_pos (data->current_loop)
3274           && allow_ip_end_pos_p (data->current_loop))
3275         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3276                                   false, IP_END, NULL, NULL);
3277
3278       /* Bind deriving uses and the new candidates.  */
3279       for (j = 0; j < ptr->uses.length (); j++)
3280         {
3281           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3282           if (cand_1)
3283             bitmap_set_bit (group->related_cands, cand_1->id);
3284           if (cand_2)
3285             bitmap_set_bit (group->related_cands, cand_2->id);
3286         }
3287     }
3288
3289   /* Release data since it is useless from this point.  */
3290   data->iv_common_cand_tab->empty ();
3291   data->iv_common_cands.truncate (0);
3292 }
3293
3294 /* Adds candidates based on the value of USE's iv.  */
3295
3296 static void
3297 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3298 {
3299   unsigned HOST_WIDE_INT offset;
3300   tree base;
3301   tree basetype;
3302   struct iv *iv = use->iv;
3303
3304   add_candidate (data, iv->base, iv->step, false, use);
3305
3306   /* Record common candidate for use in case it can be shared by others.  */
3307   record_common_cand (data, iv->base, iv->step, use);
3308
3309   /* Record common candidate with initial value zero.  */
3310   basetype = TREE_TYPE (iv->base);
3311   if (POINTER_TYPE_P (basetype))
3312     basetype = sizetype;
3313   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3314
3315   /* Record common candidate with constant offset stripped in base.
3316      Like the use itself, we also add candidate directly for it.  */
3317   base = strip_offset (iv->base, &offset);
3318   if (offset || base != iv->base)
3319     {
3320       record_common_cand (data, base, iv->step, use);
3321       add_candidate (data, base, iv->step, false, use);
3322     }
3323
3324   /* Record common candidate with base_object removed in base.  */
3325   if (iv->base_object != NULL)
3326     {
3327       unsigned i;
3328       aff_tree aff_base;
3329       tree step, base_object = iv->base_object;
3330
3331       base = iv->base;
3332       step = iv->step;
3333       STRIP_NOPS (base);
3334       STRIP_NOPS (step);
3335       STRIP_NOPS (base_object);
3336       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3337       for (i = 0; i < aff_base.n; i++)
3338         {
3339           if (aff_base.elts[i].coef != 1)
3340             continue;
3341
3342           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3343             break;
3344         }
3345       if (i < aff_base.n)
3346         {
3347           aff_combination_remove_elt (&aff_base, i);
3348           base = aff_combination_to_tree (&aff_base);
3349           basetype = TREE_TYPE (base);
3350           if (POINTER_TYPE_P (basetype))
3351             basetype = sizetype;
3352
3353           step = fold_convert (basetype, step);
3354           record_common_cand (data, base, step, use);
3355           /* Also record common candidate with offset stripped.  */
3356           base = strip_offset (base, &offset);
3357           if (offset)
3358             record_common_cand (data, base, step, use);
3359         }
3360     }
3361
3362   /* At last, add auto-incremental candidates.  Make such variables
3363      important since other iv uses with same base object may be based
3364      on it.  */
3365   if (use != NULL && use->type == USE_ADDRESS)
3366     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3367 }
3368
3369 /* Adds candidates based on the uses.  */
3370
3371 static void
3372 add_iv_candidate_for_groups (struct ivopts_data *data)
3373 {
3374   unsigned i;
3375
3376   /* Only add candidate for the first use in group.  */
3377   for (i = 0; i < data->vgroups.length (); i++)
3378     {
3379       struct iv_group *group = data->vgroups[i];
3380
3381       gcc_assert (group->vuses[0] != NULL);
3382       add_iv_candidate_for_use (data, group->vuses[0]);
3383     }
3384   add_iv_candidate_derived_from_uses (data);
3385 }
3386
3387 /* Record important candidates and add them to related_cands bitmaps.  */
3388
3389 static void
3390 record_important_candidates (struct ivopts_data *data)
3391 {
3392   unsigned i;
3393   struct iv_group *group;
3394
3395   for (i = 0; i < data->vcands.length (); i++)
3396     {
3397       struct iv_cand *cand = data->vcands[i];
3398
3399       if (cand->important)
3400         bitmap_set_bit (data->important_candidates, i);
3401     }
3402
3403   data->consider_all_candidates = (data->vcands.length ()
3404                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3405
3406   /* Add important candidates to groups' related_cands bitmaps.  */
3407   for (i = 0; i < data->vgroups.length (); i++)
3408     {
3409       group = data->vgroups[i];
3410       bitmap_ior_into (group->related_cands, data->important_candidates);
3411     }
3412 }
3413
3414 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3415    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3416    we allocate a simple list to every use.  */
3417
3418 static void
3419 alloc_use_cost_map (struct ivopts_data *data)
3420 {
3421   unsigned i, size, s;
3422
3423   for (i = 0; i < data->vgroups.length (); i++)
3424     {
3425       struct iv_group *group = data->vgroups[i];
3426
3427       if (data->consider_all_candidates)
3428         size = data->vcands.length ();
3429       else
3430         {
3431           s = bitmap_count_bits (group->related_cands);
3432
3433           /* Round up to the power of two, so that moduling by it is fast.  */
3434           size = s ? (1 << ceil_log2 (s)) : 1;
3435         }
3436
3437       group->n_map_members = size;
3438       group->cost_map = XCNEWVEC (struct cost_pair, size);
3439     }
3440 }
3441
3442 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3443    on invariants DEPENDS_ON and that the value used in expressing it
3444    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3445
3446 static void
3447 set_group_iv_cost (struct ivopts_data *data,
3448                    struct iv_group *group, struct iv_cand *cand,
3449                    comp_cost cost, bitmap depends_on, tree value,
3450                    enum tree_code comp, iv_inv_expr_ent *inv_expr)
3451 {
3452   unsigned i, s;
3453
3454   if (cost.infinite_cost_p ())
3455     {
3456       BITMAP_FREE (depends_on);
3457       return;
3458     }
3459
3460   if (data->consider_all_candidates)
3461     {
3462       group->cost_map[cand->id].cand = cand;
3463       group->cost_map[cand->id].cost = cost;
3464       group->cost_map[cand->id].depends_on = depends_on;
3465       group->cost_map[cand->id].value = value;
3466       group->cost_map[cand->id].comp = comp;
3467       group->cost_map[cand->id].inv_expr = inv_expr;
3468       return;
3469     }
3470
3471   /* n_map_members is a power of two, so this computes modulo.  */
3472   s = cand->id & (group->n_map_members - 1);
3473   for (i = s; i < group->n_map_members; i++)
3474     if (!group->cost_map[i].cand)
3475       goto found;
3476   for (i = 0; i < s; i++)
3477     if (!group->cost_map[i].cand)
3478       goto found;
3479
3480   gcc_unreachable ();
3481
3482 found:
3483   group->cost_map[i].cand = cand;
3484   group->cost_map[i].cost = cost;
3485   group->cost_map[i].depends_on = depends_on;
3486   group->cost_map[i].value = value;
3487   group->cost_map[i].comp = comp;
3488   group->cost_map[i].inv_expr = inv_expr;
3489 }
3490
3491 /* Gets cost of (GROUP, CAND) pair.  */
3492
3493 static struct cost_pair *
3494 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3495                    struct iv_cand *cand)
3496 {
3497   unsigned i, s;
3498   struct cost_pair *ret;
3499
3500   if (!cand)
3501     return NULL;
3502
3503   if (data->consider_all_candidates)
3504     {
3505       ret = group->cost_map + cand->id;
3506       if (!ret->cand)
3507         return NULL;
3508
3509       return ret;
3510     }
3511
3512   /* n_map_members is a power of two, so this computes modulo.  */
3513   s = cand->id & (group->n_map_members - 1);
3514   for (i = s; i < group->n_map_members; i++)
3515     if (group->cost_map[i].cand == cand)
3516       return group->cost_map + i;
3517     else if (group->cost_map[i].cand == NULL)
3518       return NULL;
3519   for (i = 0; i < s; i++)
3520     if (group->cost_map[i].cand == cand)
3521       return group->cost_map + i;
3522     else if (group->cost_map[i].cand == NULL)
3523       return NULL;
3524
3525   return NULL;
3526 }
3527
3528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3529 static rtx
3530 produce_memory_decl_rtl (tree obj, int *regno)
3531 {
3532   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3533   machine_mode address_mode = targetm.addr_space.address_mode (as);
3534   rtx x;
3535
3536   gcc_assert (obj);
3537   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3538     {
3539       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3540       x = gen_rtx_SYMBOL_REF (address_mode, name);
3541       SET_SYMBOL_REF_DECL (x, obj);
3542       x = gen_rtx_MEM (DECL_MODE (obj), x);
3543       set_mem_addr_space (x, as);
3544       targetm.encode_section_info (obj, x, true);
3545     }
3546   else
3547     {
3548       x = gen_raw_REG (address_mode, (*regno)++);
3549       x = gen_rtx_MEM (DECL_MODE (obj), x);
3550       set_mem_addr_space (x, as);
3551     }
3552
3553   return x;
3554 }
3555
3556 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3557    walk_tree.  DATA contains the actual fake register number.  */
3558
3559 static tree
3560 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3561 {
3562   tree obj = NULL_TREE;
3563   rtx x = NULL_RTX;
3564   int *regno = (int *) data;
3565
3566   switch (TREE_CODE (*expr_p))
3567     {
3568     case ADDR_EXPR:
3569       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3570            handled_component_p (*expr_p);
3571            expr_p = &TREE_OPERAND (*expr_p, 0))
3572         continue;
3573       obj = *expr_p;
3574       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3575         x = produce_memory_decl_rtl (obj, regno);
3576       break;
3577
3578     case SSA_NAME:
3579       *ws = 0;
3580       obj = SSA_NAME_VAR (*expr_p);
3581       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3582       if (!obj)
3583         return NULL_TREE;
3584       if (!DECL_RTL_SET_P (obj))
3585         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3586       break;
3587
3588     case VAR_DECL:
3589     case PARM_DECL:
3590     case RESULT_DECL:
3591       *ws = 0;
3592       obj = *expr_p;
3593
3594       if (DECL_RTL_SET_P (obj))
3595         break;
3596
3597       if (DECL_MODE (obj) == BLKmode)
3598         x = produce_memory_decl_rtl (obj, regno);
3599       else
3600         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3601
3602       break;
3603
3604     default:
3605       break;
3606     }
3607
3608   if (x)
3609     {
3610       decl_rtl_to_reset.safe_push (obj);
3611       SET_DECL_RTL (obj, x);
3612     }
3613
3614   return NULL_TREE;
3615 }
3616
3617 /* Determines cost of the computation of EXPR.  */
3618
3619 static unsigned
3620 computation_cost (tree expr, bool speed)
3621 {
3622   rtx_insn *seq;
3623   rtx rslt;
3624   tree type = TREE_TYPE (expr);
3625   unsigned cost;
3626   /* Avoid using hard regs in ways which may be unsupported.  */
3627   int regno = LAST_VIRTUAL_REGISTER + 1;
3628   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3629   enum node_frequency real_frequency = node->frequency;
3630
3631   node->frequency = NODE_FREQUENCY_NORMAL;
3632   crtl->maybe_hot_insn_p = speed;
3633   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3634   start_sequence ();
3635   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3636   seq = get_insns ();
3637   end_sequence ();
3638   default_rtl_profile ();
3639   node->frequency = real_frequency;
3640
3641   cost = seq_cost (seq, speed);
3642   if (MEM_P (rslt))
3643     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3644                           TYPE_ADDR_SPACE (type), speed);
3645   else if (!REG_P (rslt))
3646     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3647
3648   return cost;
3649 }
3650
3651 /* Returns variable containing the value of candidate CAND at statement AT.  */
3652
3653 static tree
3654 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3655 {
3656   if (stmt_after_increment (loop, cand, stmt))
3657     return cand->var_after;
3658   else
3659     return cand->var_before;
3660 }
3661
3662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3663    same precision that is at least as wide as the precision of TYPE, stores
3664    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3665    type of A and B.  */
3666
3667 static tree
3668 determine_common_wider_type (tree *a, tree *b)
3669 {
3670   tree wider_type = NULL;
3671   tree suba, subb;
3672   tree atype = TREE_TYPE (*a);
3673
3674   if (CONVERT_EXPR_P (*a))
3675     {
3676       suba = TREE_OPERAND (*a, 0);
3677       wider_type = TREE_TYPE (suba);
3678       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3679         return atype;
3680     }
3681   else
3682     return atype;
3683
3684   if (CONVERT_EXPR_P (*b))
3685     {
3686       subb = TREE_OPERAND (*b, 0);
3687       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3688         return atype;
3689     }
3690   else
3691     return atype;
3692
3693   *a = suba;
3694   *b = subb;
3695   return wider_type;
3696 }
3697
3698 /* Determines the expression by that USE is expressed from induction variable
3699    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3700    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3701
3702 static bool
3703 get_computation_aff (struct loop *loop,
3704                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3705                      struct aff_tree *aff)
3706 {
3707   tree ubase = use->iv->base;
3708   tree ustep = use->iv->step;
3709   tree cbase = cand->iv->base;
3710   tree cstep = cand->iv->step, cstep_common;
3711   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3712   tree common_type, var;
3713   tree uutype;
3714   aff_tree cbase_aff, var_aff;
3715   widest_int rat;
3716
3717   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3718     {
3719       /* We do not have a precision to express the values of use.  */
3720       return false;
3721     }
3722
3723   var = var_at_stmt (loop, cand, at);
3724   uutype = unsigned_type_for (utype);
3725
3726   /* If the conversion is not noop, perform it.  */
3727   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3728     {
3729       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3730           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3731         {
3732           tree inner_base, inner_step, inner_type;
3733           inner_base = TREE_OPERAND (cbase, 0);
3734           if (CONVERT_EXPR_P (cstep))
3735             inner_step = TREE_OPERAND (cstep, 0);
3736           else
3737             inner_step = cstep;
3738
3739           inner_type = TREE_TYPE (inner_base);
3740           /* If candidate is added from a biv whose type is smaller than
3741              ctype, we know both candidate and the biv won't overflow.
3742              In this case, it's safe to skip the convertion in candidate.
3743              As an example, (unsigned short)((unsigned long)A) equals to
3744              (unsigned short)A, if A has a type no larger than short.  */
3745           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3746             {
3747               cbase = inner_base;
3748               cstep = inner_step;
3749             }
3750         }
3751       cstep = fold_convert (uutype, cstep);
3752       cbase = fold_convert (uutype, cbase);
3753       var = fold_convert (uutype, var);
3754     }
3755
3756   /* Ratio is 1 when computing the value of biv cand by itself.
3757      We can't rely on constant_multiple_of in this case because the
3758      use is created after the original biv is selected.  The call
3759      could fail because of inconsistent fold behavior.  See PR68021
3760      for more information.  */
3761   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3762     {
3763       gcc_assert (is_gimple_assign (use->stmt));
3764       gcc_assert (use->iv->ssa_name == cand->var_after);
3765       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3766       rat = 1;
3767     }
3768   else if (!constant_multiple_of (ustep, cstep, &rat))
3769     return false;
3770
3771   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3772      type, we achieve better folding by computing their difference in this
3773      wider type, and cast the result to UUTYPE.  We do not need to worry about
3774      overflows, as all the arithmetics will in the end be performed in UUTYPE
3775      anyway.  */
3776   common_type = determine_common_wider_type (&ubase, &cbase);
3777
3778   /* use = ubase - ratio * cbase + ratio * var.  */
3779   tree_to_aff_combination (ubase, common_type, aff);
3780   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3781   tree_to_aff_combination (var, uutype, &var_aff);
3782
3783   /* We need to shift the value if we are after the increment.  */
3784   if (stmt_after_increment (loop, cand, at))
3785     {
3786       aff_tree cstep_aff;
3787
3788       if (common_type != uutype)
3789         cstep_common = fold_convert (common_type, cstep);
3790       else
3791         cstep_common = cstep;
3792
3793       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3794       aff_combination_add (&cbase_aff, &cstep_aff);
3795     }
3796
3797   aff_combination_scale (&cbase_aff, -rat);
3798   aff_combination_add (aff, &cbase_aff);
3799   if (common_type != uutype)
3800     aff_combination_convert (aff, uutype);
3801
3802   aff_combination_scale (&var_aff, rat);
3803   aff_combination_add (aff, &var_aff);
3804
3805   return true;
3806 }
3807
3808 /* Return the type of USE.  */
3809
3810 static tree
3811 get_use_type (struct iv_use *use)
3812 {
3813   tree base_type = TREE_TYPE (use->iv->base);
3814   tree type;
3815
3816   if (use->type == USE_ADDRESS)
3817     {
3818       /* The base_type may be a void pointer.  Create a pointer type based on
3819          the mem_ref instead.  */
3820       type = build_pointer_type (TREE_TYPE (*use->op_p));
3821       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3822                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3823     }
3824   else
3825     type = base_type;
3826
3827   return type;
3828 }
3829
3830 /* Determines the expression by that USE is expressed from induction variable
3831    CAND at statement AT in LOOP.  The computation is unshared.  */
3832
3833 static tree
3834 get_computation_at (struct loop *loop,
3835                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3836 {
3837   aff_tree aff;
3838   tree type = get_use_type (use);
3839
3840   if (!get_computation_aff (loop, use, cand, at, &aff))
3841     return NULL_TREE;
3842   unshare_aff_combination (&aff);
3843   return fold_convert (type, aff_combination_to_tree (&aff));
3844 }
3845
3846 /* Determines the expression by that USE is expressed from induction variable
3847    CAND in LOOP.  The computation is unshared.  */
3848
3849 static tree
3850 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3851 {
3852   return get_computation_at (loop, use, cand, use->stmt);
3853 }
3854
3855 /* Adjust the cost COST for being in loop setup rather than loop body.
3856    If we're optimizing for space, the loop setup overhead is constant;
3857    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3858 static unsigned
3859 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3860 {
3861   if (cost == INFTY)
3862     return cost;
3863   else if (optimize_loop_for_speed_p (data->current_loop))
3864     return cost / avg_loop_niter (data->current_loop);
3865   else
3866     return cost;
3867 }
3868
3869 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3870    validity for a memory reference accessing memory of mode MODE in
3871    address space AS.  */
3872
3873
3874 bool
3875 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3876                                  addr_space_t as)
3877 {
3878 #define MAX_RATIO 128
3879   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3880   static vec<sbitmap> valid_mult_list;
3881   sbitmap valid_mult;
3882
3883   if (data_index >= valid_mult_list.length ())
3884     valid_mult_list.safe_grow_cleared (data_index + 1);
3885
3886   valid_mult = valid_mult_list[data_index];
3887   if (!valid_mult)
3888     {
3889       machine_mode address_mode = targetm.addr_space.address_mode (as);
3890       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3891       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3892       rtx addr, scaled;
3893       HOST_WIDE_INT i;
3894
3895       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3896       bitmap_clear (valid_mult);
3897       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3898       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3899       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3900         {
3901           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3902           if (memory_address_addr_space_p (mode, addr, as)
3903               || memory_address_addr_space_p (mode, scaled, as))
3904             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3905         }
3906
3907       if (dump_file && (dump_flags & TDF_DETAILS))
3908         {
3909           fprintf (dump_file, "  allowed multipliers:");
3910           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3911             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3912               fprintf (dump_file, " %d", (int) i);
3913           fprintf (dump_file, "\n");
3914           fprintf (dump_file, "\n");
3915         }
3916
3917       valid_mult_list[data_index] = valid_mult;
3918     }
3919
3920   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3921     return false;
3922
3923   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3924 }
3925
3926 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3927    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3928    variable is omitted.  Compute the cost for a memory reference that accesses
3929    a memory location of mode MEM_MODE in address space AS.
3930
3931    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3932    size of MEM_MODE / RATIO) is available.  To make this determination, we
3933    look at the size of the increment to be made, which is given in CSTEP.
3934    CSTEP may be zero if the step is unknown.
3935    STMT_AFTER_INC is true iff the statement we're looking at is after the
3936    increment of the original biv.
3937
3938    TODO -- there must be some better way.  This all is quite crude.  */
3939
3940 enum ainc_type
3941 {
3942   AINC_PRE_INC,         /* Pre increment.  */
3943   AINC_PRE_DEC,         /* Pre decrement.  */
3944   AINC_POST_INC,        /* Post increment.  */
3945   AINC_POST_DEC,        /* Post decrement.  */
3946   AINC_NONE             /* Also the number of auto increment types.  */
3947 };
3948
3949 struct address_cost_data
3950 {
3951   HOST_WIDE_INT min_offset, max_offset;
3952   unsigned costs[2][2][2][2];
3953   unsigned ainc_costs[AINC_NONE];
3954 };
3955
3956
3957 static comp_cost
3958 get_address_cost (bool symbol_present, bool var_present,
3959                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3960                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3961                   addr_space_t as, bool speed,
3962                   bool stmt_after_inc, bool *may_autoinc)
3963 {
3964   machine_mode address_mode = targetm.addr_space.address_mode (as);
3965   static vec<address_cost_data *> address_cost_data_list;
3966   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3967   address_cost_data *data;
3968   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3969   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3970   unsigned cost, acost, complexity;
3971   enum ainc_type autoinc_type;
3972   bool offset_p, ratio_p, autoinc;
3973   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3974   unsigned HOST_WIDE_INT mask;
3975   unsigned bits;
3976
3977   if (data_index >= address_cost_data_list.length ())
3978     address_cost_data_list.safe_grow_cleared (data_index + 1);
3979
3980   data = address_cost_data_list[data_index];
3981   if (!data)
3982     {
3983       HOST_WIDE_INT i;
3984       HOST_WIDE_INT rat, off = 0;
3985       int old_cse_not_expected, width;
3986       unsigned sym_p, var_p, off_p, rat_p, add_c;
3987       rtx_insn *seq;
3988       rtx addr, base;
3989       rtx reg0, reg1;
3990
3991       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3992
3993       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3994
3995       width = GET_MODE_BITSIZE (address_mode) - 1;
3996       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3997         width = HOST_BITS_PER_WIDE_INT - 1;
3998       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3999
4000       for (i = width; i >= 0; i--)
4001         {
4002           off = -((unsigned HOST_WIDE_INT) 1 << i);
4003           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004           if (memory_address_addr_space_p (mem_mode, addr, as))
4005             break;
4006         }
4007       data->min_offset = (i == -1? 0 : off);
4008
4009       for (i = width; i >= 0; i--)
4010         {
4011           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4012           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4013           if (memory_address_addr_space_p (mem_mode, addr, as))
4014             break;
4015           /* For some strict-alignment targets, the offset must be naturally
4016              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4017           off = mem_mode != QImode
4018                 ? ((unsigned HOST_WIDE_INT) 1 << i)
4019                     - GET_MODE_SIZE (mem_mode)
4020                 : 0;
4021           if (off > 0)
4022             {
4023               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4024               if (memory_address_addr_space_p (mem_mode, addr, as))
4025                 break;
4026             }
4027         }
4028       if (i == -1)
4029         off = 0;
4030       data->max_offset = off;
4031
4032       if (dump_file && (dump_flags & TDF_DETAILS))
4033         {
4034           fprintf (dump_file, "get_address_cost:\n");
4035           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4036                    GET_MODE_NAME (mem_mode),
4037                    data->min_offset);
4038           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4039                    GET_MODE_NAME (mem_mode),
4040                    data->max_offset);
4041         }
4042
4043       rat = 1;
4044       for (i = 2; i <= MAX_RATIO; i++)
4045         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4046           {
4047             rat = i;
4048             break;
4049           }
4050
4051       /* Compute the cost of various addressing modes.  */
4052       acost = 0;
4053       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4054       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4055
4056       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4057           || USE_STORE_PRE_DECREMENT (mem_mode))
4058         {
4059           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4060           has_predec[mem_mode]
4061             = memory_address_addr_space_p (mem_mode, addr, as);
4062
4063           if (has_predec[mem_mode])
4064             data->ainc_costs[AINC_PRE_DEC]
4065               = address_cost (addr, mem_mode, as, speed);
4066         }
4067       if (USE_LOAD_POST_DECREMENT (mem_mode)
4068           || USE_STORE_POST_DECREMENT (mem_mode))
4069         {
4070           addr = gen_rtx_POST_DEC (address_mode, reg0);
4071           has_postdec[mem_mode]
4072             = memory_address_addr_space_p (mem_mode, addr, as);
4073
4074           if (has_postdec[mem_mode])
4075             data->ainc_costs[AINC_POST_DEC]
4076               = address_cost (addr, mem_mode, as, speed);
4077         }
4078       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4079           || USE_STORE_PRE_DECREMENT (mem_mode))
4080         {
4081           addr = gen_rtx_PRE_INC (address_mode, reg0);
4082           has_preinc[mem_mode]
4083             = memory_address_addr_space_p (mem_mode, addr, as);
4084
4085           if (has_preinc[mem_mode])
4086             data->ainc_costs[AINC_PRE_INC]
4087               = address_cost (addr, mem_mode, as, speed);
4088         }
4089       if (USE_LOAD_POST_INCREMENT (mem_mode)
4090           || USE_STORE_POST_INCREMENT (mem_mode))
4091         {
4092           addr = gen_rtx_POST_INC (address_mode, reg0);
4093           has_postinc[mem_mode]
4094             = memory_address_addr_space_p (mem_mode, addr, as);
4095
4096           if (has_postinc[mem_mode])
4097             data->ainc_costs[AINC_POST_INC]
4098               = address_cost (addr, mem_mode, as, speed);
4099         }
4100       for (i = 0; i < 16; i++)
4101         {
4102           sym_p = i & 1;
4103           var_p = (i >> 1) & 1;
4104           off_p = (i >> 2) & 1;
4105           rat_p = (i >> 3) & 1;
4106
4107           addr = reg0;
4108           if (rat_p)
4109             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4110                                    gen_int_mode (rat, address_mode));
4111
4112           if (var_p)
4113             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4114
4115           if (sym_p)
4116             {
4117               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4118               /* ??? We can run into trouble with some backends by presenting
4119                  it with symbols which haven't been properly passed through
4120                  targetm.encode_section_info.  By setting the local bit, we
4121                  enhance the probability of things working.  */
4122               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4123
4124               if (off_p)
4125                 base = gen_rtx_fmt_e (CONST, address_mode,
4126                                       gen_rtx_fmt_ee
4127                                         (PLUS, address_mode, base,
4128                                          gen_int_mode (off, address_mode)));
4129             }
4130           else if (off_p)
4131             base = gen_int_mode (off, address_mode);
4132           else
4133             base = NULL_RTX;
4134
4135           if (base)
4136             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4137
4138           start_sequence ();
4139           /* To avoid splitting addressing modes, pretend that no cse will
4140              follow.  */
4141           old_cse_not_expected = cse_not_expected;
4142           cse_not_expected = true;
4143           addr = memory_address_addr_space (mem_mode, addr, as);
4144           cse_not_expected = old_cse_not_expected;
4145           seq = get_insns ();
4146           end_sequence ();
4147
4148           acost = seq_cost (seq, speed);
4149           acost += address_cost (addr, mem_mode, as, speed);
4150
4151           if (!acost)
4152             acost = 1;
4153           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4154         }
4155
4156       /* On some targets, it is quite expensive to load symbol to a register,
4157          which makes addresses that contain symbols look much more expensive.
4158          However, the symbol will have to be loaded in any case before the
4159          loop (and quite likely we have it in register already), so it does not
4160          make much sense to penalize them too heavily.  So make some final
4161          tweaks for the SYMBOL_PRESENT modes:
4162
4163          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4164          var is cheaper, use this mode with small penalty.
4165          If VAR_PRESENT is true, try whether the mode with
4166          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4167          if this is the case, use it.  */
4168       add_c = add_cost (speed, address_mode);
4169       for (i = 0; i < 8; i++)
4170         {
4171           var_p = i & 1;
4172           off_p = (i >> 1) & 1;
4173           rat_p = (i >> 2) & 1;
4174
4175           acost = data->costs[0][1][off_p][rat_p] + 1;
4176           if (var_p)
4177             acost += add_c;
4178
4179           if (acost < data->costs[1][var_p][off_p][rat_p])
4180             data->costs[1][var_p][off_p][rat_p] = acost;
4181         }
4182
4183       if (dump_file && (dump_flags & TDF_DETAILS))
4184         {
4185           fprintf (dump_file, "<Address Costs>:\n");
4186
4187           for (i = 0; i < 16; i++)
4188             {
4189               sym_p = i & 1;
4190               var_p = (i >> 1) & 1;
4191               off_p = (i >> 2) & 1;
4192               rat_p = (i >> 3) & 1;
4193
4194               fprintf (dump_file, "  ");
4195               if (sym_p)
4196                 fprintf (dump_file, "sym + ");
4197               if (var_p)
4198                 fprintf (dump_file, "var + ");
4199               if (off_p)
4200                 fprintf (dump_file, "cst + ");
4201               if (rat_p)
4202                 fprintf (dump_file, "rat * ");
4203
4204               acost = data->costs[sym_p][var_p][off_p][rat_p];
4205               fprintf (dump_file, "index costs %d\n", acost);
4206             }
4207           if (has_predec[mem_mode] || has_postdec[mem_mode]
4208               || has_preinc[mem_mode] || has_postinc[mem_mode])
4209             fprintf (dump_file, "  May include autoinc/dec\n");
4210           fprintf (dump_file, "\n");
4211         }
4212
4213       address_cost_data_list[data_index] = data;
4214     }
4215
4216   bits = GET_MODE_BITSIZE (address_mode);
4217   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4218   offset &= mask;
4219   if ((offset >> (bits - 1) & 1))
4220     offset |= ~mask;
4221   s_offset = offset;
4222
4223   autoinc = false;
4224   autoinc_type = AINC_NONE;
4225   msize = GET_MODE_SIZE (mem_mode);
4226   autoinc_offset = offset;
4227   if (stmt_after_inc)
4228     autoinc_offset += ratio * cstep;
4229   if (symbol_present || var_present || ratio != 1)
4230     autoinc = false;
4231   else
4232     {
4233       if (has_postinc[mem_mode] && autoinc_offset == 0
4234           && msize == cstep)
4235         autoinc_type = AINC_POST_INC;
4236       else if (has_postdec[mem_mode] && autoinc_offset == 0
4237                && msize == -cstep)
4238         autoinc_type = AINC_POST_DEC;
4239       else if (has_preinc[mem_mode] && autoinc_offset == msize
4240                && msize == cstep)
4241         autoinc_type = AINC_PRE_INC;
4242       else if (has_predec[mem_mode] && autoinc_offset == -msize
4243                && msize == -cstep)
4244         autoinc_type = AINC_PRE_DEC;
4245
4246       if (autoinc_type != AINC_NONE)
4247         autoinc = true;
4248     }
4249
4250   cost = 0;
4251   offset_p = (s_offset != 0
4252               && data->min_offset <= s_offset
4253               && s_offset <= data->max_offset);
4254   ratio_p = (ratio != 1
4255              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4256
4257   if (ratio != 1 && !ratio_p)
4258     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4259
4260   if (s_offset && !offset_p && !symbol_present)
4261     cost += add_cost (speed, address_mode);
4262
4263   if (may_autoinc)
4264     *may_autoinc = autoinc;
4265   if (autoinc)
4266     acost = data->ainc_costs[autoinc_type];
4267   else
4268     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4269   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4270   return comp_cost (cost + acost, complexity);
4271 }
4272
4273  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4274     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4275     calculating the operands of EXPR.  Returns true if successful, and returns
4276     the cost in COST.  */
4277
4278 static bool
4279 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4280                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4281 {
4282   comp_cost res;
4283   tree op1 = TREE_OPERAND (expr, 1);
4284   tree cst = TREE_OPERAND (mult, 1);
4285   tree multop = TREE_OPERAND (mult, 0);
4286   int m = exact_log2 (int_cst_value (cst));
4287   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4288   int as_cost, sa_cost;
4289   bool mult_in_op1;
4290
4291   if (!(m >= 0 && m < maxm))
4292     return false;
4293
4294   STRIP_NOPS (op1);
4295   mult_in_op1 = operand_equal_p (op1, mult, 0);
4296
4297   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4298
4299   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4300      use that in preference to a shift insn followed by an add insn.  */
4301   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4302              ? shiftadd_cost (speed, mode, m)
4303              : (mult_in_op1
4304                 ? shiftsub1_cost (speed, mode, m)
4305                 : shiftsub0_cost (speed, mode, m)));
4306
4307   res = comp_cost (MIN (as_cost, sa_cost), 0);
4308   res += (mult_in_op1 ? cost0 : cost1);
4309
4310   STRIP_NOPS (multop);
4311   if (!is_gimple_val (multop))
4312     res += force_expr_to_var_cost (multop, speed);
4313
4314   *cost = res;
4315   return true;
4316 }
4317
4318 /* Estimates cost of forcing expression EXPR into a variable.  */
4319
4320 static comp_cost
4321 force_expr_to_var_cost (tree expr, bool speed)
4322 {
4323   static bool costs_initialized = false;
4324   static unsigned integer_cost [2];
4325   static unsigned symbol_cost [2];
4326   static unsigned address_cost [2];
4327   tree op0, op1;
4328   comp_cost cost0, cost1, cost;
4329   machine_mode mode;
4330
4331   if (!costs_initialized)
4332     {
4333       tree type = build_pointer_type (integer_type_node);
4334       tree var, addr;
4335       rtx x;
4336       int i;
4337
4338       var = create_tmp_var_raw (integer_type_node, "test_var");
4339       TREE_STATIC (var) = 1;
4340       x = produce_memory_decl_rtl (var, NULL);
4341       SET_DECL_RTL (var, x);
4342
4343       addr = build1 (ADDR_EXPR, type, var);
4344
4345
4346       for (i = 0; i < 2; i++)
4347         {
4348           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4349                                                              2000), i);
4350
4351           symbol_cost[i] = computation_cost (addr, i) + 1;
4352
4353           address_cost[i]
4354             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4355           if (dump_file && (dump_flags & TDF_DETAILS))
4356             {
4357               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4358               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4359               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4360               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4361               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4362               fprintf (dump_file, "\n");
4363             }
4364         }
4365
4366       costs_initialized = true;
4367     }
4368
4369   STRIP_NOPS (expr);
4370
4371   if (SSA_VAR_P (expr))
4372     return no_cost;
4373
4374   if (is_gimple_min_invariant (expr))
4375     {
4376       if (TREE_CODE (expr) == INTEGER_CST)
4377         return comp_cost (integer_cost [speed], 0);
4378
4379       if (TREE_CODE (expr) == ADDR_EXPR)
4380         {
4381           tree obj = TREE_OPERAND (expr, 0);
4382
4383           if (TREE_CODE (obj) == VAR_DECL
4384               || TREE_CODE (obj) == PARM_DECL
4385               || TREE_CODE (obj) == RESULT_DECL)
4386             return comp_cost (symbol_cost [speed], 0);
4387         }
4388
4389       return comp_cost (address_cost [speed], 0);
4390     }
4391
4392   switch (TREE_CODE (expr))
4393     {
4394     case POINTER_PLUS_EXPR:
4395     case PLUS_EXPR:
4396     case MINUS_EXPR:
4397     case MULT_EXPR:
4398       op0 = TREE_OPERAND (expr, 0);
4399       op1 = TREE_OPERAND (expr, 1);
4400       STRIP_NOPS (op0);
4401       STRIP_NOPS (op1);
4402       break;
4403
4404     CASE_CONVERT:
4405     case NEGATE_EXPR:
4406       op0 = TREE_OPERAND (expr, 0);
4407       STRIP_NOPS (op0);
4408       op1 = NULL_TREE;
4409       break;
4410
4411     default:
4412       /* Just an arbitrary value, FIXME.  */
4413       return comp_cost (target_spill_cost[speed], 0);
4414     }
4415
4416   if (op0 == NULL_TREE
4417       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4418     cost0 = no_cost;
4419   else
4420     cost0 = force_expr_to_var_cost (op0, speed);
4421
4422   if (op1 == NULL_TREE
4423       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4424     cost1 = no_cost;
4425   else
4426     cost1 = force_expr_to_var_cost (op1, speed);
4427
4428   mode = TYPE_MODE (TREE_TYPE (expr));
4429   switch (TREE_CODE (expr))
4430     {
4431     case POINTER_PLUS_EXPR:
4432     case PLUS_EXPR:
4433     case MINUS_EXPR:
4434     case NEGATE_EXPR:
4435       cost = comp_cost (add_cost (speed, mode), 0);
4436       if (TREE_CODE (expr) != NEGATE_EXPR)
4437         {
4438           tree mult = NULL_TREE;
4439           comp_cost sa_cost;
4440           if (TREE_CODE (op1) == MULT_EXPR)
4441             mult = op1;
4442           else if (TREE_CODE (op0) == MULT_EXPR)
4443             mult = op0;
4444
4445           if (mult != NULL_TREE
4446               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4447               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4448                                     speed, &sa_cost))
4449             return sa_cost;
4450         }
4451       break;
4452
4453     CASE_CONVERT:
4454       {
4455         tree inner_mode, outer_mode;
4456         outer_mode = TREE_TYPE (expr);
4457         inner_mode = TREE_TYPE (op0);
4458         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4459                                        TYPE_MODE (inner_mode), speed), 0);
4460       }
4461       break;
4462
4463     case MULT_EXPR:
4464       if (cst_and_fits_in_hwi (op0))
4465         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4466                                              mode, speed), 0);
4467       else if (cst_and_fits_in_hwi (op1))
4468         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4469                                              mode, speed), 0);
4470       else
4471         return comp_cost (target_spill_cost [speed], 0);
4472       break;
4473
4474     default:
4475       gcc_unreachable ();
4476     }
4477
4478   cost += cost0;
4479   cost += cost1;
4480
4481   /* Bound the cost by target_spill_cost.  The parts of complicated
4482      computations often are either loop invariant or at least can
4483      be shared between several iv uses, so letting this grow without
4484      limits would not give reasonable results.  */
4485   if (cost.cost > (int) target_spill_cost [speed])
4486     cost.cost = target_spill_cost [speed];
4487
4488   return cost;
4489 }
4490
4491 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4492    invariants the computation depends on.  */
4493
4494 static comp_cost
4495 force_var_cost (struct ivopts_data *data,
4496                 tree expr, bitmap *depends_on)
4497 {
4498   if (depends_on)
4499     {
4500       fd_ivopts_data = data;
4501       walk_tree (&expr, find_depends, depends_on, NULL);
4502     }
4503
4504   return force_expr_to_var_cost (expr, data->speed);
4505 }
4506
4507 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4508    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4509    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4510    invariants the computation depends on.  */
4511
4512 static comp_cost
4513 split_address_cost (struct ivopts_data *data,
4514                     tree addr, bool *symbol_present, bool *var_present,
4515                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4516 {
4517   tree core;
4518   HOST_WIDE_INT bitsize;
4519   HOST_WIDE_INT bitpos;
4520   tree toffset;
4521   machine_mode mode;
4522   int unsignedp, reversep, volatilep;
4523
4524   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4525                               &unsignedp, &reversep, &volatilep, false);
4526
4527   if (toffset != 0
4528       || bitpos % BITS_PER_UNIT != 0
4529       || reversep
4530       || TREE_CODE (core) != VAR_DECL)
4531     {
4532       *symbol_present = false;
4533       *var_present = true;
4534       fd_ivopts_data = data;
4535       if (depends_on)
4536         walk_tree (&addr, find_depends, depends_on, NULL);
4537
4538       return comp_cost (target_spill_cost[data->speed], 0);
4539     }
4540
4541   *offset += bitpos / BITS_PER_UNIT;
4542   if (TREE_STATIC (core)
4543       || DECL_EXTERNAL (core))
4544     {
4545       *symbol_present = true;
4546       *var_present = false;
4547       return no_cost;
4548     }
4549
4550   *symbol_present = false;
4551   *var_present = true;
4552   return no_cost;
4553 }
4554
4555 /* Estimates cost of expressing difference of addresses E1 - E2 as
4556    var + symbol + offset.  The value of offset is added to OFFSET,
4557    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4558    part is missing.  DEPENDS_ON is a set of the invariants the computation
4559    depends on.  */
4560
4561 static comp_cost
4562 ptr_difference_cost (struct ivopts_data *data,
4563                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4564                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4565 {
4566   HOST_WIDE_INT diff = 0;
4567   aff_tree aff_e1, aff_e2;
4568   tree type;
4569
4570   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4571
4572   if (ptr_difference_const (e1, e2, &diff))
4573     {
4574       *offset += diff;
4575       *symbol_present = false;
4576       *var_present = false;
4577       return no_cost;
4578     }
4579
4580   if (integer_zerop (e2))
4581     return split_address_cost (data, TREE_OPERAND (e1, 0),
4582                                symbol_present, var_present, offset, depends_on);
4583
4584   *symbol_present = false;
4585   *var_present = true;
4586
4587   type = signed_type_for (TREE_TYPE (e1));
4588   tree_to_aff_combination (e1, type, &aff_e1);
4589   tree_to_aff_combination (e2, type, &aff_e2);
4590   aff_combination_scale (&aff_e2, -1);
4591   aff_combination_add (&aff_e1, &aff_e2);
4592
4593   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4594 }
4595
4596 /* Estimates cost of expressing difference E1 - E2 as
4597    var + symbol + offset.  The value of offset is added to OFFSET,
4598    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4599    part is missing.  DEPENDS_ON is a set of the invariants the computation
4600    depends on.  */
4601
4602 static comp_cost
4603 difference_cost (struct ivopts_data *data,
4604                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4605                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4606 {
4607   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4608   unsigned HOST_WIDE_INT off1, off2;
4609   aff_tree aff_e1, aff_e2;
4610   tree type;
4611
4612   e1 = strip_offset (e1, &off1);
4613   e2 = strip_offset (e2, &off2);
4614   *offset += off1 - off2;
4615
4616   STRIP_NOPS (e1);
4617   STRIP_NOPS (e2);
4618
4619   if (TREE_CODE (e1) == ADDR_EXPR)
4620     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4621                                 offset, depends_on);
4622   *symbol_present = false;
4623
4624   if (operand_equal_p (e1, e2, 0))
4625     {
4626       *var_present = false;
4627       return no_cost;
4628     }
4629
4630   *var_present = true;
4631
4632   if (integer_zerop (e2))
4633     return force_var_cost (data, e1, depends_on);
4634
4635   if (integer_zerop (e1))
4636     {
4637       comp_cost cost = force_var_cost (data, e2, depends_on);
4638       cost += mult_by_coeff_cost (-1, mode, data->speed);
4639       return cost;
4640     }
4641
4642   type = signed_type_for (TREE_TYPE (e1));
4643   tree_to_aff_combination (e1, type, &aff_e1);
4644   tree_to_aff_combination (e2, type, &aff_e2);
4645   aff_combination_scale (&aff_e2, -1);
4646   aff_combination_add (&aff_e1, &aff_e2);
4647
4648   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4649 }
4650
4651 /* Returns true if AFF1 and AFF2 are identical.  */
4652
4653 static bool
4654 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4655 {
4656   unsigned i;
4657
4658   if (aff1->n != aff2->n)
4659     return false;
4660
4661   for (i = 0; i < aff1->n; i++)
4662     {
4663       if (aff1->elts[i].coef != aff2->elts[i].coef)
4664         return false;
4665
4666       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4667         return false;
4668     }
4669   return true;
4670 }
4671
4672 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
4673
4674 static iv_inv_expr_ent *
4675 record_inv_expr (struct ivopts_data *data, tree expr)
4676 {
4677   struct iv_inv_expr_ent ent;
4678   struct iv_inv_expr_ent **slot;
4679
4680   ent.expr = expr;
4681   ent.hash = iterative_hash_expr (expr, 0);
4682   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4683
4684   if (!*slot)
4685     {
4686       *slot = XNEW (struct iv_inv_expr_ent);
4687       (*slot)->expr = expr;
4688       (*slot)->hash = ent.hash;
4689       (*slot)->id = data->max_inv_expr_id++;
4690     }
4691
4692   return *slot;
4693 }
4694
4695 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4696    requires a new compiler generated temporary.  Returns -1 otherwise.
4697    ADDRESS_P is a flag indicating if the expression is for address
4698    computation.  */
4699
4700 static iv_inv_expr_ent *
4701 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4702                          tree cbase, HOST_WIDE_INT ratio,
4703                          bool address_p)
4704 {
4705   aff_tree ubase_aff, cbase_aff;
4706   tree expr, ub, cb;
4707
4708   STRIP_NOPS (ubase);
4709   STRIP_NOPS (cbase);
4710   ub = ubase;
4711   cb = cbase;
4712
4713   if ((TREE_CODE (ubase) == INTEGER_CST)
4714       && (TREE_CODE (cbase) == INTEGER_CST))
4715     return NULL;
4716
4717   /* Strips the constant part. */
4718   if (TREE_CODE (ubase) == PLUS_EXPR
4719       || TREE_CODE (ubase) == MINUS_EXPR
4720       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4721     {
4722       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4723         ubase = TREE_OPERAND (ubase, 0);
4724     }
4725
4726   /* Strips the constant part. */
4727   if (TREE_CODE (cbase) == PLUS_EXPR
4728       || TREE_CODE (cbase) == MINUS_EXPR
4729       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4730     {
4731       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4732         cbase = TREE_OPERAND (cbase, 0);
4733     }
4734
4735   if (address_p)
4736     {
4737       if (((TREE_CODE (ubase) == SSA_NAME)
4738            || (TREE_CODE (ubase) == ADDR_EXPR
4739                && is_gimple_min_invariant (ubase)))
4740           && (TREE_CODE (cbase) == INTEGER_CST))
4741         return NULL;
4742
4743       if (((TREE_CODE (cbase) == SSA_NAME)
4744            || (TREE_CODE (cbase) == ADDR_EXPR
4745                && is_gimple_min_invariant (cbase)))
4746           && (TREE_CODE (ubase) == INTEGER_CST))
4747         return NULL;
4748     }
4749
4750   if (ratio == 1)
4751     {
4752       if (operand_equal_p (ubase, cbase, 0))
4753         return NULL;
4754
4755       if (TREE_CODE (ubase) == ADDR_EXPR
4756           && TREE_CODE (cbase) == ADDR_EXPR)
4757         {
4758           tree usym, csym;
4759
4760           usym = TREE_OPERAND (ubase, 0);
4761           csym = TREE_OPERAND (cbase, 0);
4762           if (TREE_CODE (usym) == ARRAY_REF)
4763             {
4764               tree ind = TREE_OPERAND (usym, 1);
4765               if (TREE_CODE (ind) == INTEGER_CST
4766                   && tree_fits_shwi_p (ind)
4767                   && tree_to_shwi (ind) == 0)
4768                 usym = TREE_OPERAND (usym, 0);
4769             }
4770           if (TREE_CODE (csym) == ARRAY_REF)
4771             {
4772               tree ind = TREE_OPERAND (csym, 1);
4773               if (TREE_CODE (ind) == INTEGER_CST
4774                   && tree_fits_shwi_p (ind)
4775                   && tree_to_shwi (ind) == 0)
4776                 csym = TREE_OPERAND (csym, 0);
4777             }
4778           if (operand_equal_p (usym, csym, 0))
4779             return NULL;
4780         }
4781       /* Now do more complex comparison  */
4782       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4783       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4784       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4785         return NULL;
4786     }
4787
4788   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4789   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4790
4791   aff_combination_scale (&cbase_aff, -1 * ratio);
4792   aff_combination_add (&ubase_aff, &cbase_aff);
4793   expr = aff_combination_to_tree (&ubase_aff);
4794   return record_inv_expr (data, expr);
4795 }
4796
4797 /* Scale (multiply) the computed COST (except scratch part that should be
4798    hoisted out a loop) by header->frequency / AT->frequency,
4799    which makes expected cost more accurate.  */
4800
4801 static comp_cost
4802 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, iv_cand *cand,
4803                                 comp_cost cost)
4804 {
4805    int loop_freq = data->current_loop->header->frequency;
4806    int bb_freq = at->bb->frequency;
4807    if (loop_freq != 0)
4808      {
4809        gcc_assert (cost.scratch <= cost.cost);
4810        int scaled_cost
4811          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4812
4813        if (dump_file && (dump_flags & TDF_DETAILS))
4814          fprintf (dump_file, "Scaling iv_use based on cand %d "
4815                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4816                   cand->id, 1.0f * bb_freq / loop_freq, cost.cost,
4817                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4818
4819        cost.cost = scaled_cost;
4820      }
4821
4822   return cost;
4823 }
4824
4825 /* Determines the cost of the computation by that USE is expressed
4826    from induction variable CAND.  If ADDRESS_P is true, we just need
4827    to create an address from it, otherwise we want to get it into
4828    register.  A set of invariants we depend on is stored in
4829    DEPENDS_ON.  AT is the statement at that the value is computed.
4830    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4831    addressing is likely.  */
4832
4833 static comp_cost
4834 get_computation_cost_at (struct ivopts_data *data,
4835                          struct iv_use *use, struct iv_cand *cand,
4836                          bool address_p, bitmap *depends_on, gimple *at,
4837                          bool *can_autoinc,
4838                          iv_inv_expr_ent **inv_expr)
4839 {
4840   tree ubase = use->iv->base, ustep = use->iv->step;
4841   tree cbase, cstep;
4842   tree utype = TREE_TYPE (ubase), ctype;
4843   unsigned HOST_WIDE_INT cstepi, offset = 0;
4844   HOST_WIDE_INT ratio, aratio;
4845   bool var_present, symbol_present, stmt_is_after_inc;
4846   comp_cost cost;
4847   widest_int rat;
4848   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4849   machine_mode mem_mode = (address_p
4850                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4851                                 : VOIDmode);
4852
4853   if (depends_on)
4854     *depends_on = NULL;
4855
4856   /* Only consider real candidates.  */
4857   if (!cand->iv)
4858     return infinite_cost;
4859
4860   cbase = cand->iv->base;
4861   cstep = cand->iv->step;
4862   ctype = TREE_TYPE (cbase);
4863
4864   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4865     {
4866       /* We do not have a precision to express the values of use.  */
4867       return infinite_cost;
4868     }
4869
4870   if (address_p
4871       || (use->iv->base_object
4872           && cand->iv->base_object
4873           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4874           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4875     {
4876       /* Do not try to express address of an object with computation based
4877          on address of a different object.  This may cause problems in rtl
4878          level alias analysis (that does not expect this to be happening,
4879          as this is illegal in C), and would be unlikely to be useful
4880          anyway.  */
4881       if (use->iv->base_object
4882           && cand->iv->base_object
4883           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4884         return infinite_cost;
4885     }
4886
4887   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4888     {
4889       /* TODO -- add direct handling of this case.  */
4890       goto fallback;
4891     }
4892
4893   /* CSTEPI is removed from the offset in case statement is after the
4894      increment.  If the step is not constant, we use zero instead.
4895      This is a bit imprecise (there is the extra addition), but
4896      redundancy elimination is likely to transform the code so that
4897      it uses value of the variable before increment anyway,
4898      so it is not that much unrealistic.  */
4899   if (cst_and_fits_in_hwi (cstep))
4900     cstepi = int_cst_value (cstep);
4901   else
4902     cstepi = 0;
4903
4904   if (!constant_multiple_of (ustep, cstep, &rat))
4905     return infinite_cost;
4906
4907   if (wi::fits_shwi_p (rat))
4908     ratio = rat.to_shwi ();
4909   else
4910     return infinite_cost;
4911
4912   STRIP_NOPS (cbase);
4913   ctype = TREE_TYPE (cbase);
4914
4915   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4916
4917   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4918      or ratio == 1, it is better to handle this like
4919
4920      ubase - ratio * cbase + ratio * var
4921
4922      (also holds in the case ratio == -1, TODO.  */
4923
4924   if (cst_and_fits_in_hwi (cbase))
4925     {
4926       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4927       cost = difference_cost (data,
4928                               ubase, build_int_cst (utype, 0),
4929                               &symbol_present, &var_present, &offset,
4930                               depends_on);
4931       cost /= avg_loop_niter (data->current_loop);
4932     }
4933   else if (ratio == 1)
4934     {
4935       tree real_cbase = cbase;
4936
4937       /* Check to see if any adjustment is needed.  */
4938       if (cstepi == 0 && stmt_is_after_inc)
4939         {
4940           aff_tree real_cbase_aff;
4941           aff_tree cstep_aff;
4942
4943           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4944                                    &real_cbase_aff);
4945           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4946
4947           aff_combination_add (&real_cbase_aff, &cstep_aff);
4948           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4949         }
4950
4951       cost = difference_cost (data,
4952                               ubase, real_cbase,
4953                               &symbol_present, &var_present, &offset,
4954                               depends_on);
4955       cost /= avg_loop_niter (data->current_loop);
4956     }
4957   else if (address_p
4958            && !POINTER_TYPE_P (ctype)
4959            && multiplier_allowed_in_address_p
4960                 (ratio, mem_mode,
4961                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4962     {
4963       tree real_cbase = cbase;
4964
4965       if (cstepi == 0 && stmt_is_after_inc)
4966         {
4967           if (POINTER_TYPE_P (ctype))
4968             real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4969           else
4970             real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4971         }
4972       real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4973                                 build_int_cst (ctype, ratio));
4974       cost = difference_cost (data,
4975                               ubase, real_cbase,
4976                               &symbol_present, &var_present, &offset,
4977                               depends_on);
4978       cost /= avg_loop_niter (data->current_loop);
4979     }
4980   else
4981     {
4982       cost = force_var_cost (data, cbase, depends_on);
4983       cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4984                                &symbol_present, &var_present, &offset,
4985                                depends_on);
4986       cost /= avg_loop_niter (data->current_loop);
4987       cost += add_cost (data->speed, TYPE_MODE (ctype));
4988     }
4989
4990   /* Record setup cost in scratch field.  */
4991   cost.scratch = cost.cost;
4992
4993   if (inv_expr && depends_on && *depends_on)
4994     {
4995       *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4996                                            address_p);
4997       /* Clear depends on.  */
4998       if (*inv_expr != NULL)
4999         bitmap_clear (*depends_on);
5000     }
5001
5002   /* If we are after the increment, the value of the candidate is higher by
5003      one iteration.  */
5004   if (stmt_is_after_inc)
5005     offset -= ratio * cstepi;
5006
5007   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5008      (symbol/var1/const parts may be omitted).  If we are looking for an
5009      address, find the cost of addressing this.  */
5010   if (address_p)
5011     {
5012       cost += get_address_cost (symbol_present, var_present,
5013                                 offset, ratio, cstepi,
5014                                 mem_mode,
5015                                 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
5016                                 speed, stmt_is_after_inc, can_autoinc);
5017       return get_scaled_computation_cost_at (data, at, cand, cost);
5018     }
5019
5020   /* Otherwise estimate the costs for computing the expression.  */
5021   if (!symbol_present && !var_present && !offset)
5022     {
5023       if (ratio != 1)
5024         cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
5025       return get_scaled_computation_cost_at (data, at, cand, cost);
5026     }
5027
5028   /* Symbol + offset should be compile-time computable so consider that they
5029       are added once to the variable, if present.  */
5030   if (var_present && (symbol_present || offset))
5031     cost += adjust_setup_cost (data,
5032                                     add_cost (speed, TYPE_MODE (ctype)));
5033
5034   /* Having offset does not affect runtime cost in case it is added to
5035      symbol, but it increases complexity.  */
5036   if (offset)
5037     cost.complexity++;
5038
5039   cost += add_cost (speed, TYPE_MODE (ctype));
5040
5041   aratio = ratio > 0 ? ratio : -ratio;
5042   if (aratio != 1)
5043     cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5044
5045   return get_scaled_computation_cost_at (data, at, cand, cost);
5046
5047 fallback:
5048   if (can_autoinc)
5049     *can_autoinc = false;
5050
5051   /* Just get the expression, expand it and measure the cost.  */
5052   tree comp = get_computation_at (data->current_loop, use, cand, at);
5053
5054   if (!comp)
5055     return infinite_cost;
5056
5057   if (address_p)
5058     comp = build_simple_mem_ref (comp);
5059
5060   cost = comp_cost (computation_cost (comp, speed), 0);
5061
5062   return get_scaled_computation_cost_at (data, at, cand, cost);
5063 }
5064
5065 /* Determines the cost of the computation by that USE is expressed
5066    from induction variable CAND.  If ADDRESS_P is true, we just need
5067    to create an address from it, otherwise we want to get it into
5068    register.  A set of invariants we depend on is stored in
5069    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5070    autoinc addressing is likely.  */
5071
5072 static comp_cost
5073 get_computation_cost (struct ivopts_data *data,
5074                       struct iv_use *use, struct iv_cand *cand,
5075                       bool address_p, bitmap *depends_on,
5076                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5077 {
5078   return get_computation_cost_at (data,
5079                                   use, cand, address_p, depends_on, use->stmt,
5080                                   can_autoinc, inv_expr);
5081 }
5082
5083 /* Determines cost of computing the use in GROUP with CAND in a generic
5084    expression.  */
5085
5086 static bool
5087 determine_group_iv_cost_generic (struct ivopts_data *data,
5088                                  struct iv_group *group, struct iv_cand *cand)
5089 {
5090   comp_cost cost;
5091   iv_inv_expr_ent *inv_expr = NULL;
5092   bitmap depends_on = NULL;
5093   struct iv_use *use = group->vuses[0];
5094
5095   /* The simple case first -- if we need to express value of the preserved
5096      original biv, the cost is 0.  This also prevents us from counting the
5097      cost of increment twice -- once at this use and once in the cost of
5098      the candidate.  */
5099   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5100     cost = no_cost;
5101   else
5102     cost = get_computation_cost (data, use, cand, false,
5103                                  &depends_on, NULL, &inv_expr);
5104
5105   set_group_iv_cost (data, group, cand, cost, depends_on,
5106                      NULL_TREE, ERROR_MARK, inv_expr);
5107   return !cost.infinite_cost_p ();
5108 }
5109
5110 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5111
5112 static bool
5113 determine_group_iv_cost_address (struct ivopts_data *data,
5114                                  struct iv_group *group, struct iv_cand *cand)
5115 {
5116   unsigned i;
5117   bitmap depends_on;
5118   bool can_autoinc, first = true;
5119   iv_inv_expr_ent *inv_expr = NULL;
5120   struct iv_use *use = group->vuses[0];
5121   comp_cost sum_cost = no_cost, cost;
5122
5123   cost = get_computation_cost (data, use, cand, true,
5124                                &depends_on, &can_autoinc, &inv_expr);
5125
5126   sum_cost = cost;
5127   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5128     {
5129       if (can_autoinc)
5130         sum_cost -= cand->cost_step;
5131       /* If we generated the candidate solely for exploiting autoincrement
5132          opportunities, and it turns out it can't be used, set the cost to
5133          infinity to make sure we ignore it.  */
5134       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5135         sum_cost = infinite_cost;
5136     }
5137
5138   /* Uses in a group can share setup code, so only add setup cost once.  */
5139   cost -= cost.scratch;
5140   /* Compute and add costs for rest uses of this group.  */
5141   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5142     {
5143       struct iv_use *next = group->vuses[i];
5144
5145       /* Compute cost for the first use with different offset to the main
5146          use and add it afterwards.  Costs for these uses could be quite
5147          different.  Given below uses in a group:
5148            use 0  : {base + A + offset_0, step}
5149            use 0.1: {base + A + offset_0, step}
5150            use 0.2: {base + A + offset_1, step}
5151            use 0.3: {base + A + offset_2, step}
5152          when we need to compute costs with candidate:
5153            cand 1 : {base + B + offset_0, step}
5154
5155          The first use with different offset is use 0.2, its cost is larger
5156          than cost of use 0/0.1 because we need to compute:
5157            A - B + offset_1 - offset_0
5158            rather than:
5159            A - B.  */
5160       if (first && next->addr_offset != use->addr_offset)
5161         {
5162           first = false;
5163           cost = get_computation_cost (data, next, cand, true,
5164                                        NULL, &can_autoinc, NULL);
5165           /* Remove setup cost.  */
5166           if (!cost.infinite_cost_p ())
5167             cost -= cost.scratch;
5168         }
5169       sum_cost += cost;
5170     }
5171   set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5172                      NULL_TREE, ERROR_MARK, inv_expr);
5173
5174   return !sum_cost.infinite_cost_p ();
5175 }
5176
5177 /* Computes value of candidate CAND at position AT in iteration NITER, and
5178    stores it to VAL.  */
5179
5180 static void
5181 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5182                aff_tree *val)
5183 {
5184   aff_tree step, delta, nit;
5185   struct iv *iv = cand->iv;
5186   tree type = TREE_TYPE (iv->base);
5187   tree steptype = type;
5188   if (POINTER_TYPE_P (type))
5189     steptype = sizetype;
5190   steptype = unsigned_type_for (type);
5191
5192   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5193   aff_combination_convert (&step, steptype);
5194   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5195   aff_combination_convert (&nit, steptype);
5196   aff_combination_mult (&nit, &step, &delta);
5197   if (stmt_after_increment (loop, cand, at))
5198     aff_combination_add (&delta, &step);
5199
5200   tree_to_aff_combination (iv->base, type, val);
5201   if (!POINTER_TYPE_P (type))
5202     aff_combination_convert (val, steptype);
5203   aff_combination_add (val, &delta);
5204 }
5205
5206 /* Returns period of induction variable iv.  */
5207
5208 static tree
5209 iv_period (struct iv *iv)
5210 {
5211   tree step = iv->step, period, type;
5212   tree pow2div;
5213
5214   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5215
5216   type = unsigned_type_for (TREE_TYPE (step));
5217   /* Period of the iv is lcm (step, type_range)/step -1,
5218      i.e., N*type_range/step - 1. Since type range is power
5219      of two, N == (step >> num_of_ending_zeros_binary (step),
5220      so the final result is
5221
5222        (type_range >> num_of_ending_zeros_binary (step)) - 1
5223
5224   */
5225   pow2div = num_ending_zeros (step);
5226
5227   period = build_low_bits_mask (type,
5228                                 (TYPE_PRECISION (type)
5229                                  - tree_to_uhwi (pow2div)));
5230
5231   return period;
5232 }
5233
5234 /* Returns the comparison operator used when eliminating the iv USE.  */
5235
5236 static enum tree_code
5237 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5238 {
5239   struct loop *loop = data->current_loop;
5240   basic_block ex_bb;
5241   edge exit;
5242
5243   ex_bb = gimple_bb (use->stmt);
5244   exit = EDGE_SUCC (ex_bb, 0);
5245   if (flow_bb_inside_loop_p (loop, exit->dest))
5246     exit = EDGE_SUCC (ex_bb, 1);
5247
5248   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5249 }
5250
5251 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5252    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5253    calculation is performed in non-wrapping type.
5254
5255    TODO: More generally, we could test for the situation that
5256          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5257          This would require knowing the sign of OFFSET.  */
5258
5259 static bool
5260 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5261 {
5262   enum tree_code code;
5263   tree e1, e2;
5264   aff_tree aff_e1, aff_e2, aff_offset;
5265
5266   if (!nowrap_type_p (TREE_TYPE (base)))
5267     return false;
5268
5269   base = expand_simple_operations (base);
5270
5271   if (TREE_CODE (base) == SSA_NAME)
5272     {
5273       gimple *stmt = SSA_NAME_DEF_STMT (base);
5274
5275       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5276         return false;
5277
5278       code = gimple_assign_rhs_code (stmt);
5279       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5280         return false;
5281
5282       e1 = gimple_assign_rhs1 (stmt);
5283       e2 = gimple_assign_rhs2 (stmt);
5284     }
5285   else
5286     {
5287       code = TREE_CODE (base);
5288       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5289         return false;
5290       e1 = TREE_OPERAND (base, 0);
5291       e2 = TREE_OPERAND (base, 1);
5292     }
5293
5294   /* Use affine expansion as deeper inspection to prove the equality.  */
5295   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5296                                   &aff_e2, &data->name_expansion_cache);
5297   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5298                                   &aff_offset, &data->name_expansion_cache);
5299   aff_combination_scale (&aff_offset, -1);
5300   switch (code)
5301     {
5302     case PLUS_EXPR:
5303       aff_combination_add (&aff_e2, &aff_offset);
5304       if (aff_combination_zero_p (&aff_e2))
5305         return true;
5306
5307       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5308                                       &aff_e1, &data->name_expansion_cache);
5309       aff_combination_add (&aff_e1, &aff_offset);
5310       return aff_combination_zero_p (&aff_e1);
5311
5312     case POINTER_PLUS_EXPR:
5313       aff_combination_add (&aff_e2, &aff_offset);
5314       return aff_combination_zero_p (&aff_e2);
5315
5316     default:
5317       return false;
5318     }
5319 }
5320
5321 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5322    comparison with CAND.  NITER describes the number of iterations of
5323    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5324
5325    We aim to handle the following situation:
5326
5327    sometype *base, *p;
5328    int a, b, i;
5329
5330    i = a;
5331    p = p_0 = base + a;
5332
5333    do
5334      {
5335        bla (*p);
5336        p++;
5337        i++;
5338      }
5339    while (i < b);
5340
5341    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5342    We aim to optimize this to
5343
5344    p = p_0 = base + a;
5345    do
5346      {
5347        bla (*p);
5348        p++;
5349      }
5350    while (p < p_0 - a + b);
5351
5352    This preserves the correctness, since the pointer arithmetics does not
5353    overflow.  More precisely:
5354
5355    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5356       overflow in computing it or the values of p.
5357    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5358       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5359
5360 static bool
5361 iv_elimination_compare_lt (struct ivopts_data *data,
5362                            struct iv_cand *cand, enum tree_code *comp_p,
5363                            struct tree_niter_desc *niter)
5364 {
5365   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5366   struct aff_tree nit, tmpa, tmpb;
5367   enum tree_code comp;
5368   HOST_WIDE_INT step;
5369
5370   /* We need to know that the candidate induction variable does not overflow.
5371      While more complex analysis may be used to prove this, for now just
5372      check that the variable appears in the original program and that it
5373      is computed in a type that guarantees no overflows.  */
5374   cand_type = TREE_TYPE (cand->iv->base);
5375   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5376     return false;
5377
5378   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5379      the calculation of the BOUND could overflow, making the comparison
5380      invalid.  */
5381   if (!data->loop_single_exit_p)
5382     return false;
5383
5384   /* We need to be able to decide whether candidate is increasing or decreasing
5385      in order to choose the right comparison operator.  */
5386   if (!cst_and_fits_in_hwi (cand->iv->step))
5387     return false;
5388   step = int_cst_value (cand->iv->step);
5389
5390   /* Check that the number of iterations matches the expected pattern:
5391      a + 1 > b ? 0 : b - a - 1.  */
5392   mbz = niter->may_be_zero;
5393   if (TREE_CODE (mbz) == GT_EXPR)
5394     {
5395       /* Handle a + 1 > b.  */
5396       tree op0 = TREE_OPERAND (mbz, 0);
5397       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5398         {
5399           a = TREE_OPERAND (op0, 0);
5400           b = TREE_OPERAND (mbz, 1);
5401         }
5402       else
5403         return false;
5404     }
5405   else if (TREE_CODE (mbz) == LT_EXPR)
5406     {
5407       tree op1 = TREE_OPERAND (mbz, 1);
5408
5409       /* Handle b < a + 1.  */
5410       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5411         {
5412           a = TREE_OPERAND (op1, 0);
5413           b = TREE_OPERAND (mbz, 0);
5414         }
5415       else
5416         return false;
5417     }
5418   else
5419     return false;
5420
5421   /* Expected number of iterations is B - A - 1.  Check that it matches
5422      the actual number, i.e., that B - A - NITER = 1.  */
5423   tree_to_aff_combination (niter->niter, nit_type, &nit);
5424   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5425   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5426   aff_combination_scale (&nit, -1);
5427   aff_combination_scale (&tmpa, -1);
5428   aff_combination_add (&tmpb, &tmpa);
5429   aff_combination_add (&tmpb, &nit);
5430   if (tmpb.n != 0 || tmpb.offset != 1)
5431     return false;
5432
5433   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5434      overflow.  */
5435   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5436                         cand->iv->step,
5437                         fold_convert (TREE_TYPE (cand->iv->step), a));
5438   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5439     return false;
5440
5441   /* Determine the new comparison operator.  */
5442   comp = step < 0 ? GT_EXPR : LT_EXPR;
5443   if (*comp_p == NE_EXPR)
5444     *comp_p = comp;
5445   else if (*comp_p == EQ_EXPR)
5446     *comp_p = invert_tree_comparison (comp, false);
5447   else
5448     gcc_unreachable ();
5449
5450   return true;
5451 }
5452
5453 /* Check whether it is possible to express the condition in USE by comparison
5454    of candidate CAND.  If so, store the value compared with to BOUND, and the
5455    comparison operator to COMP.  */
5456
5457 static bool
5458 may_eliminate_iv (struct ivopts_data *data,
5459                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5460                   enum tree_code *comp)
5461 {
5462   basic_block ex_bb;
5463   edge exit;
5464   tree period;
5465   struct loop *loop = data->current_loop;
5466   aff_tree bnd;
5467   struct tree_niter_desc *desc = NULL;
5468
5469   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5470     return false;
5471
5472   /* For now works only for exits that dominate the loop latch.
5473      TODO: extend to other conditions inside loop body.  */
5474   ex_bb = gimple_bb (use->stmt);
5475   if (use->stmt != last_stmt (ex_bb)
5476       || gimple_code (use->stmt) != GIMPLE_COND
5477       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5478     return false;
5479
5480   exit = EDGE_SUCC (ex_bb, 0);
5481   if (flow_bb_inside_loop_p (loop, exit->dest))
5482     exit = EDGE_SUCC (ex_bb, 1);
5483   if (flow_bb_inside_loop_p (loop, exit->dest))
5484     return false;
5485
5486   desc = niter_for_exit (data, exit);
5487   if (!desc)
5488     return false;
5489
5490   /* Determine whether we can use the variable to test the exit condition.
5491      This is the case iff the period of the induction variable is greater
5492      than the number of iterations for which the exit condition is true.  */
5493   period = iv_period (cand->iv);
5494
5495   /* If the number of iterations is constant, compare against it directly.  */
5496   if (TREE_CODE (desc->niter) == INTEGER_CST)
5497     {
5498       /* See cand_value_at.  */
5499       if (stmt_after_increment (loop, cand, use->stmt))
5500         {
5501           if (!tree_int_cst_lt (desc->niter, period))
5502             return false;
5503         }
5504       else
5505         {
5506           if (tree_int_cst_lt (period, desc->niter))
5507             return false;
5508         }
5509     }
5510
5511   /* If not, and if this is the only possible exit of the loop, see whether
5512      we can get a conservative estimate on the number of iterations of the
5513      entire loop and compare against that instead.  */
5514   else
5515     {
5516       widest_int period_value, max_niter;
5517
5518       max_niter = desc->max;
5519       if (stmt_after_increment (loop, cand, use->stmt))
5520         max_niter += 1;
5521       period_value = wi::to_widest (period);
5522       if (wi::gtu_p (max_niter, period_value))
5523         {
5524           /* See if we can take advantage of inferred loop bound
5525              information.  */
5526           if (data->loop_single_exit_p)
5527             {
5528               if (!max_loop_iterations (loop, &max_niter))
5529                 return false;
5530               /* The loop bound is already adjusted by adding 1.  */
5531               if (wi::gtu_p (max_niter, period_value))
5532                 return false;
5533             }
5534           else
5535             return false;
5536         }
5537     }
5538
5539   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5540
5541   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5542                          aff_combination_to_tree (&bnd));
5543   *comp = iv_elimination_compare (data, use);
5544
5545   /* It is unlikely that computing the number of iterations using division
5546      would be more profitable than keeping the original induction variable.  */
5547   if (expression_expensive_p (*bound))
5548     return false;
5549
5550   /* Sometimes, it is possible to handle the situation that the number of
5551      iterations may be zero unless additional assumtions by using <
5552      instead of != in the exit condition.
5553
5554      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5555            base the exit condition on it.  However, that is often too
5556            expensive.  */
5557   if (!integer_zerop (desc->may_be_zero))
5558     return iv_elimination_compare_lt (data, cand, comp, desc);
5559
5560   return true;
5561 }
5562
5563  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5564     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5565
5566 static int
5567 parm_decl_cost (struct ivopts_data *data, tree bound)
5568 {
5569   tree sbound = bound;
5570   STRIP_NOPS (sbound);
5571
5572   if (TREE_CODE (sbound) == SSA_NAME
5573       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5574       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5575       && data->body_includes_call)
5576     return COSTS_N_INSNS (1);
5577
5578   return 0;
5579 }
5580
5581 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5582
5583 static bool
5584 determine_group_iv_cost_cond (struct ivopts_data *data,
5585                               struct iv_group *group, struct iv_cand *cand)
5586 {
5587   tree bound = NULL_TREE;
5588   struct iv *cmp_iv;
5589   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5590   comp_cost elim_cost, express_cost, cost, bound_cost;
5591   bool ok;
5592   iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5593   tree *control_var, *bound_cst;
5594   enum tree_code comp = ERROR_MARK;
5595   struct iv_use *use = group->vuses[0];
5596
5597   gcc_assert (cand->iv);
5598
5599   /* Try iv elimination.  */
5600   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5601     {
5602       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5603       if (elim_cost.cost == 0)
5604         elim_cost.cost = parm_decl_cost (data, bound);
5605       else if (TREE_CODE (bound) == INTEGER_CST)
5606         elim_cost.cost = 0;
5607       /* If we replace a loop condition 'i < n' with 'p < base + n',
5608          depends_on_elim will have 'base' and 'n' set, which implies
5609          that both 'base' and 'n' will be live during the loop.  More likely,
5610          'base + n' will be loop invariant, resulting in only one live value
5611          during the loop.  So in that case we clear depends_on_elim and set
5612         elim_inv_expr_id instead.  */
5613       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5614         {
5615           elim_inv_expr = record_inv_expr (data, bound);
5616           bitmap_clear (depends_on_elim);
5617         }
5618       /* The bound is a loop invariant, so it will be only computed
5619          once.  */
5620       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5621     }
5622   else
5623     elim_cost = infinite_cost;
5624
5625   /* Try expressing the original giv.  If it is compared with an invariant,
5626      note that we cannot get rid of it.  */
5627   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5628                               NULL, &cmp_iv);
5629   gcc_assert (ok);
5630
5631   /* When the condition is a comparison of the candidate IV against
5632      zero, prefer this IV.
5633
5634      TODO: The constant that we're subtracting from the cost should
5635      be target-dependent.  This information should be added to the
5636      target costs for each backend.  */
5637   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5638       && integer_zerop (*bound_cst)
5639       && (operand_equal_p (*control_var, cand->var_after, 0)
5640           || operand_equal_p (*control_var, cand->var_before, 0)))
5641     elim_cost -= 1;
5642
5643   express_cost = get_computation_cost (data, use, cand, false,
5644                                        &depends_on_express, NULL,
5645                                        &express_inv_expr);
5646   fd_ivopts_data = data;
5647   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5648
5649   /* Count the cost of the original bound as well.  */
5650   bound_cost = force_var_cost (data, *bound_cst, NULL);
5651   if (bound_cost.cost == 0)
5652     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5653   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5654     bound_cost.cost = 0;
5655   express_cost += bound_cost;
5656
5657   /* Choose the better approach, preferring the eliminated IV. */
5658   if (elim_cost <= express_cost)
5659     {
5660       cost = elim_cost;
5661       depends_on = depends_on_elim;
5662       depends_on_elim = NULL;
5663       inv_expr = elim_inv_expr;
5664     }
5665   else
5666     {
5667       cost = express_cost;
5668       depends_on = depends_on_express;
5669       depends_on_express = NULL;
5670       bound = NULL_TREE;
5671       comp = ERROR_MARK;
5672       inv_expr = express_inv_expr;
5673     }
5674
5675   set_group_iv_cost (data, group, cand, cost,
5676                      depends_on, bound, comp, inv_expr);
5677
5678   if (depends_on_elim)
5679     BITMAP_FREE (depends_on_elim);
5680   if (depends_on_express)
5681     BITMAP_FREE (depends_on_express);
5682
5683   return !cost.infinite_cost_p ();
5684 }
5685
5686 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5687    if USE cannot be represented with CAND.  */
5688
5689 static bool
5690 determine_group_iv_cost (struct ivopts_data *data,
5691                          struct iv_group *group, struct iv_cand *cand)
5692 {
5693   switch (group->type)
5694     {
5695     case USE_NONLINEAR_EXPR:
5696       return determine_group_iv_cost_generic (data, group, cand);
5697
5698     case USE_ADDRESS:
5699       return determine_group_iv_cost_address (data, group, cand);
5700
5701     case USE_COMPARE:
5702       return determine_group_iv_cost_cond (data, group, cand);
5703
5704     default:
5705       gcc_unreachable ();
5706     }
5707 }
5708
5709 /* Return true if get_computation_cost indicates that autoincrement is
5710    a possibility for the pair of USE and CAND, false otherwise.  */
5711
5712 static bool
5713 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5714                            struct iv_cand *cand)
5715 {
5716   bitmap depends_on;
5717   bool can_autoinc;
5718   comp_cost cost;
5719
5720   if (use->type != USE_ADDRESS)
5721     return false;
5722
5723   cost = get_computation_cost (data, use, cand, true, &depends_on,
5724                                &can_autoinc, NULL);
5725
5726   BITMAP_FREE (depends_on);
5727
5728   return !cost.infinite_cost_p () && can_autoinc;
5729 }
5730
5731 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5732    use that allows autoincrement, and set their AINC_USE if possible.  */
5733
5734 static void
5735 set_autoinc_for_original_candidates (struct ivopts_data *data)
5736 {
5737   unsigned i, j;
5738
5739   for (i = 0; i < data->vcands.length (); i++)
5740     {
5741       struct iv_cand *cand = data->vcands[i];
5742       struct iv_use *closest_before = NULL;
5743       struct iv_use *closest_after = NULL;
5744       if (cand->pos != IP_ORIGINAL)
5745         continue;
5746
5747       for (j = 0; j < data->vgroups.length (); j++)
5748         {
5749           struct iv_group *group = data->vgroups[j];
5750           struct iv_use *use = group->vuses[0];
5751           unsigned uid = gimple_uid (use->stmt);
5752
5753           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5754             continue;
5755
5756           if (uid < gimple_uid (cand->incremented_at)
5757               && (closest_before == NULL
5758                   || uid > gimple_uid (closest_before->stmt)))
5759             closest_before = use;
5760
5761           if (uid > gimple_uid (cand->incremented_at)
5762               && (closest_after == NULL
5763                   || uid < gimple_uid (closest_after->stmt)))
5764             closest_after = use;
5765         }
5766
5767       if (closest_before != NULL
5768           && autoinc_possible_for_pair (data, closest_before, cand))
5769         cand->ainc_use = closest_before;
5770       else if (closest_after != NULL
5771                && autoinc_possible_for_pair (data, closest_after, cand))
5772         cand->ainc_use = closest_after;
5773     }
5774 }
5775
5776 /* Finds the candidates for the induction variables.  */
5777
5778 static void
5779 find_iv_candidates (struct ivopts_data *data)
5780 {
5781   /* Add commonly used ivs.  */
5782   add_standard_iv_candidates (data);
5783
5784   /* Add old induction variables.  */
5785   add_iv_candidate_for_bivs (data);
5786
5787   /* Add induction variables derived from uses.  */
5788   add_iv_candidate_for_groups (data);
5789
5790   set_autoinc_for_original_candidates (data);
5791
5792   /* Record the important candidates.  */
5793   record_important_candidates (data);
5794
5795   if (dump_file && (dump_flags & TDF_DETAILS))
5796     {
5797       unsigned i;
5798
5799       fprintf (dump_file, "\n<Important Candidates>:\t");
5800       for (i = 0; i < data->vcands.length (); i++)
5801         if (data->vcands[i]->important)
5802           fprintf (dump_file, " %d,", data->vcands[i]->id);
5803       fprintf (dump_file, "\n");
5804
5805       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5806       for (i = 0; i < data->vgroups.length (); i++)
5807         {
5808           struct iv_group *group = data->vgroups[i];
5809
5810           if (group->related_cands)
5811             {
5812               fprintf (dump_file, "  Group %d:\t", group->id);
5813               dump_bitmap (dump_file, group->related_cands);
5814             }
5815         }
5816       fprintf (dump_file, "\n");
5817     }
5818 }
5819
5820 /* Determines costs of computing use of iv with an iv candidate.  */
5821
5822 static void
5823 determine_group_iv_costs (struct ivopts_data *data)
5824 {
5825   unsigned i, j;
5826   struct iv_cand *cand;
5827   struct iv_group *group;
5828   bitmap to_clear = BITMAP_ALLOC (NULL);
5829
5830   alloc_use_cost_map (data);
5831
5832   for (i = 0; i < data->vgroups.length (); i++)
5833     {
5834       group = data->vgroups[i];
5835
5836       if (data->consider_all_candidates)
5837         {
5838           for (j = 0; j < data->vcands.length (); j++)
5839             {
5840               cand = data->vcands[j];
5841               determine_group_iv_cost (data, group, cand);
5842             }
5843         }
5844       else
5845         {
5846           bitmap_iterator bi;
5847
5848           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5849             {
5850               cand = data->vcands[j];
5851               if (!determine_group_iv_cost (data, group, cand))
5852                 bitmap_set_bit (to_clear, j);
5853             }
5854
5855           /* Remove the candidates for that the cost is infinite from
5856              the list of related candidates.  */
5857           bitmap_and_compl_into (group->related_cands, to_clear);
5858           bitmap_clear (to_clear);
5859         }
5860     }
5861
5862   BITMAP_FREE (to_clear);
5863
5864   if (dump_file && (dump_flags & TDF_DETAILS))
5865     {
5866       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5867       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5868
5869       for (hash_table<iv_inv_expr_hasher>::iterator it
5870            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5871            ++it)
5872         list.safe_push (*it);
5873
5874       list.qsort (sort_iv_inv_expr_ent);
5875
5876       for (i = 0; i < list.length (); ++i)
5877         {
5878           fprintf (dump_file, "inv_expr %d: \t", i);
5879           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5880           fprintf (dump_file, "\n");
5881         }
5882
5883       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5884
5885       for (i = 0; i < data->vgroups.length (); i++)
5886         {
5887           group = data->vgroups[i];
5888
5889           fprintf (dump_file, "Group %d:\n", i);
5890           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5891           for (j = 0; j < group->n_map_members; j++)
5892             {
5893               if (!group->cost_map[j].cand
5894                   || group->cost_map[j].cost.infinite_cost_p ())
5895                 continue;
5896
5897               fprintf (dump_file, "  %d\t%d\t%d\t",
5898                        group->cost_map[j].cand->id,
5899                        group->cost_map[j].cost.cost,
5900                        group->cost_map[j].cost.complexity);
5901               if (group->cost_map[j].inv_expr != NULL)
5902                 fprintf (dump_file, "%d\t",
5903                          group->cost_map[j].inv_expr->id);
5904               else
5905                 fprintf (dump_file, "\t");
5906               if (group->cost_map[j].depends_on)
5907                 bitmap_print (dump_file,
5908                               group->cost_map[j].depends_on, "","");
5909               fprintf (dump_file, "\n");
5910             }
5911
5912           fprintf (dump_file, "\n");
5913         }
5914       fprintf (dump_file, "\n");
5915     }
5916 }
5917
5918 /* Determines cost of the candidate CAND.  */
5919
5920 static void
5921 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5922 {
5923   comp_cost cost_base;
5924   unsigned cost, cost_step;
5925   tree base;
5926
5927   if (!cand->iv)
5928     {
5929       cand->cost = 0;
5930       return;
5931     }
5932
5933   /* There are two costs associated with the candidate -- its increment
5934      and its initialization.  The second is almost negligible for any loop
5935      that rolls enough, so we take it just very little into account.  */
5936
5937   base = cand->iv->base;
5938   cost_base = force_var_cost (data, base, NULL);
5939   /* It will be exceptional that the iv register happens to be initialized with
5940      the proper value at no cost.  In general, there will at least be a regcopy
5941      or a const set.  */
5942   if (cost_base.cost == 0)
5943     cost_base.cost = COSTS_N_INSNS (1);
5944   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5945
5946   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5947
5948   /* Prefer the original ivs unless we may gain something by replacing it.
5949      The reason is to make debugging simpler; so this is not relevant for
5950      artificial ivs created by other optimization passes.  */
5951   if (cand->pos != IP_ORIGINAL
5952       || !SSA_NAME_VAR (cand->var_before)
5953       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5954     cost++;
5955
5956   /* Prefer not to insert statements into latch unless there are some
5957      already (so that we do not create unnecessary jumps).  */
5958   if (cand->pos == IP_END
5959       && empty_block_p (ip_end_pos (data->current_loop)))
5960     cost++;
5961
5962   cand->cost = cost;
5963   cand->cost_step = cost_step;
5964 }
5965
5966 /* Determines costs of computation of the candidates.  */
5967
5968 static void
5969 determine_iv_costs (struct ivopts_data *data)
5970 {
5971   unsigned i;
5972
5973   if (dump_file && (dump_flags & TDF_DETAILS))
5974     {
5975       fprintf (dump_file, "<Candidate Costs>:\n");
5976       fprintf (dump_file, "  cand\tcost\n");
5977     }
5978
5979   for (i = 0; i < data->vcands.length (); i++)
5980     {
5981       struct iv_cand *cand = data->vcands[i];
5982
5983       determine_iv_cost (data, cand);
5984
5985       if (dump_file && (dump_flags & TDF_DETAILS))
5986         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5987     }
5988
5989   if (dump_file && (dump_flags & TDF_DETAILS))
5990     fprintf (dump_file, "\n");
5991 }
5992
5993 /* Calculates cost for having SIZE induction variables.  */
5994
5995 static unsigned
5996 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5997 {
5998   /* We add size to the cost, so that we prefer eliminating ivs
5999      if possible.  */
6000   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
6001                                             data->body_includes_call);
6002 }
6003
6004 /* For each size of the induction variable set determine the penalty.  */
6005
6006 static void
6007 determine_set_costs (struct ivopts_data *data)
6008 {
6009   unsigned j, n;
6010   gphi *phi;
6011   gphi_iterator psi;
6012   tree op;
6013   struct loop *loop = data->current_loop;
6014   bitmap_iterator bi;
6015
6016   if (dump_file && (dump_flags & TDF_DETAILS))
6017     {
6018       fprintf (dump_file, "<Global Costs>:\n");
6019       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6020       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6021       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6022       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6023     }
6024
6025   n = 0;
6026   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6027     {
6028       phi = psi.phi ();
6029       op = PHI_RESULT (phi);
6030
6031       if (virtual_operand_p (op))
6032         continue;
6033
6034       if (get_iv (data, op))
6035         continue;
6036
6037       n++;
6038     }
6039
6040   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6041     {
6042       struct version_info *info = ver_info (data, j);
6043
6044       if (info->inv_id && info->has_nonlin_use)
6045         n++;
6046     }
6047
6048   data->regs_used = n;
6049   if (dump_file && (dump_flags & TDF_DETAILS))
6050     fprintf (dump_file, "  regs_used %d\n", n);
6051
6052   if (dump_file && (dump_flags & TDF_DETAILS))
6053     {
6054       fprintf (dump_file, "  cost for size:\n");
6055       fprintf (dump_file, "  ivs\tcost\n");
6056       for (j = 0; j <= 2 * target_avail_regs; j++)
6057         fprintf (dump_file, "  %d\t%d\n", j,
6058                  ivopts_global_cost_for_size (data, j));
6059       fprintf (dump_file, "\n");
6060     }
6061 }
6062
6063 /* Returns true if A is a cheaper cost pair than B.  */
6064
6065 static bool
6066 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6067 {
6068   if (!a)
6069     return false;
6070
6071   if (!b)
6072     return true;
6073
6074   if (a->cost < b->cost)
6075     return true;
6076
6077   if (b->cost < a->cost)
6078     return false;
6079
6080   /* In case the costs are the same, prefer the cheaper candidate.  */
6081   if (a->cand->cost < b->cand->cost)
6082     return true;
6083
6084   return false;
6085 }
6086
6087
6088 /* Returns candidate by that USE is expressed in IVS.  */
6089
6090 static struct cost_pair *
6091 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6092 {
6093   return ivs->cand_for_group[group->id];
6094 }
6095
6096 /* Computes the cost field of IVS structure.  */
6097
6098 static void
6099 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6100 {
6101   comp_cost cost = ivs->cand_use_cost;
6102
6103   cost += ivs->cand_cost;
6104
6105   cost += ivopts_global_cost_for_size (data,
6106                                        ivs->n_regs
6107                                        + ivs->used_inv_exprs->elements ());
6108
6109   ivs->cost = cost;
6110 }
6111
6112 /* Remove invariants in set INVS to set IVS.  */
6113
6114 static void
6115 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6116 {
6117   bitmap_iterator bi;
6118   unsigned iid;
6119
6120   if (!invs)
6121     return;
6122
6123   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6124     {
6125       ivs->n_invariant_uses[iid]--;
6126       if (ivs->n_invariant_uses[iid] == 0)
6127         ivs->n_regs--;
6128     }
6129 }
6130
6131 /* Set USE not to be expressed by any candidate in IVS.  */
6132
6133 static void
6134 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6135                  struct iv_group *group)
6136 {
6137   unsigned gid = group->id, cid;
6138   struct cost_pair *cp;
6139
6140   cp = ivs->cand_for_group[gid];
6141   if (!cp)
6142     return;
6143   cid = cp->cand->id;
6144
6145   ivs->bad_groups++;
6146   ivs->cand_for_group[gid] = NULL;
6147   ivs->n_cand_uses[cid]--;
6148
6149   if (ivs->n_cand_uses[cid] == 0)
6150     {
6151       bitmap_clear_bit (ivs->cands, cid);
6152       /* Do not count the pseudocandidates.  */
6153       if (cp->cand->iv)
6154         ivs->n_regs--;
6155       ivs->n_cands--;
6156       ivs->cand_cost -= cp->cand->cost;
6157
6158       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6159     }
6160
6161   ivs->cand_use_cost -= cp->cost;
6162
6163   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6164
6165   if (cp->inv_expr != NULL)
6166     {
6167       unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6168       --(*slot);
6169       if (*slot == 0)
6170         ivs->used_inv_exprs->remove (cp->inv_expr);
6171     }
6172   iv_ca_recount_cost (data, ivs);
6173 }
6174
6175 /* Add invariants in set INVS to set IVS.  */
6176
6177 static void
6178 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6179 {
6180   bitmap_iterator bi;
6181   unsigned iid;
6182
6183   if (!invs)
6184     return;
6185
6186   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6187     {
6188       ivs->n_invariant_uses[iid]++;
6189       if (ivs->n_invariant_uses[iid] == 1)
6190         ivs->n_regs++;
6191     }
6192 }
6193
6194 /* Set cost pair for GROUP in set IVS to CP.  */
6195
6196 static void
6197 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6198               struct iv_group *group, struct cost_pair *cp)
6199 {
6200   unsigned gid = group->id, cid;
6201
6202   if (ivs->cand_for_group[gid] == cp)
6203     return;
6204
6205   if (ivs->cand_for_group[gid])
6206     iv_ca_set_no_cp (data, ivs, group);
6207
6208   if (cp)
6209     {
6210       cid = cp->cand->id;
6211
6212       ivs->bad_groups--;
6213       ivs->cand_for_group[gid] = cp;
6214       ivs->n_cand_uses[cid]++;
6215       if (ivs->n_cand_uses[cid] == 1)
6216         {
6217           bitmap_set_bit (ivs->cands, cid);
6218           /* Do not count the pseudocandidates.  */
6219           if (cp->cand->iv)
6220             ivs->n_regs++;
6221           ivs->n_cands++;
6222           ivs->cand_cost += cp->cand->cost;
6223
6224           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6225         }
6226
6227       ivs->cand_use_cost += cp->cost;
6228       iv_ca_set_add_invariants (ivs, cp->depends_on);
6229
6230       if (cp->inv_expr != NULL)
6231         {
6232           unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6233           ++(*slot);
6234         }
6235       iv_ca_recount_cost (data, ivs);
6236     }
6237 }
6238
6239 /* Extend set IVS by expressing USE by some of the candidates in it
6240    if possible.  Consider all important candidates if candidates in
6241    set IVS don't give any result.  */
6242
6243 static void
6244 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6245                struct iv_group *group)
6246 {
6247   struct cost_pair *best_cp = NULL, *cp;
6248   bitmap_iterator bi;
6249   unsigned i;
6250   struct iv_cand *cand;
6251
6252   gcc_assert (ivs->upto >= group->id);
6253   ivs->upto++;
6254   ivs->bad_groups++;
6255
6256   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6257     {
6258       cand = data->vcands[i];
6259       cp = get_group_iv_cost (data, group, cand);
6260       if (cheaper_cost_pair (cp, best_cp))
6261         best_cp = cp;
6262     }
6263
6264   if (best_cp == NULL)
6265     {
6266       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6267         {
6268           cand = data->vcands[i];
6269           cp = get_group_iv_cost (data, group, cand);
6270           if (cheaper_cost_pair (cp, best_cp))
6271             best_cp = cp;
6272         }
6273     }
6274
6275   iv_ca_set_cp (data, ivs, group, best_cp);
6276 }
6277
6278 /* Get cost for assignment IVS.  */
6279
6280 static comp_cost
6281 iv_ca_cost (struct iv_ca *ivs)
6282 {
6283   /* This was a conditional expression but it triggered a bug in
6284      Sun C 5.5.  */
6285   if (ivs->bad_groups)
6286     return infinite_cost;
6287   else
6288     return ivs->cost;
6289 }
6290
6291 /* Returns true if all dependences of CP are among invariants in IVS.  */
6292
6293 static bool
6294 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6295 {
6296   unsigned i;
6297   bitmap_iterator bi;
6298
6299   if (!cp->depends_on)
6300     return true;
6301
6302   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6303     {
6304       if (ivs->n_invariant_uses[i] == 0)
6305         return false;
6306     }
6307
6308   return true;
6309 }
6310
6311 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6312    it before NEXT.  */
6313
6314 static struct iv_ca_delta *
6315 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6316                  struct cost_pair *new_cp, struct iv_ca_delta *next)
6317 {
6318   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6319
6320   change->group = group;
6321   change->old_cp = old_cp;
6322   change->new_cp = new_cp;
6323   change->next = next;
6324
6325   return change;
6326 }
6327
6328 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6329    are rewritten.  */
6330
6331 static struct iv_ca_delta *
6332 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6333 {
6334   struct iv_ca_delta *last;
6335
6336   if (!l2)
6337     return l1;
6338
6339   if (!l1)
6340     return l2;
6341
6342   for (last = l1; last->next; last = last->next)
6343     continue;
6344   last->next = l2;
6345
6346   return l1;
6347 }
6348
6349 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6350
6351 static struct iv_ca_delta *
6352 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6353 {
6354   struct iv_ca_delta *act, *next, *prev = NULL;
6355
6356   for (act = delta; act; act = next)
6357     {
6358       next = act->next;
6359       act->next = prev;
6360       prev = act;
6361
6362       std::swap (act->old_cp, act->new_cp);
6363     }
6364
6365   return prev;
6366 }
6367
6368 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6369    reverted instead.  */
6370
6371 static void
6372 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6373                     struct iv_ca_delta *delta, bool forward)
6374 {
6375   struct cost_pair *from, *to;
6376   struct iv_ca_delta *act;
6377
6378   if (!forward)
6379     delta = iv_ca_delta_reverse (delta);
6380
6381   for (act = delta; act; act = act->next)
6382     {
6383       from = act->old_cp;
6384       to = act->new_cp;
6385       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6386       iv_ca_set_cp (data, ivs, act->group, to);
6387     }
6388
6389   if (!forward)
6390     iv_ca_delta_reverse (delta);
6391 }
6392
6393 /* Returns true if CAND is used in IVS.  */
6394
6395 static bool
6396 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6397 {
6398   return ivs->n_cand_uses[cand->id] > 0;
6399 }
6400
6401 /* Returns number of induction variable candidates in the set IVS.  */
6402
6403 static unsigned
6404 iv_ca_n_cands (struct iv_ca *ivs)
6405 {
6406   return ivs->n_cands;
6407 }
6408
6409 /* Free the list of changes DELTA.  */
6410
6411 static void
6412 iv_ca_delta_free (struct iv_ca_delta **delta)
6413 {
6414   struct iv_ca_delta *act, *next;
6415
6416   for (act = *delta; act; act = next)
6417     {
6418       next = act->next;
6419       free (act);
6420     }
6421
6422   *delta = NULL;
6423 }
6424
6425 /* Allocates new iv candidates assignment.  */
6426
6427 static struct iv_ca *
6428 iv_ca_new (struct ivopts_data *data)
6429 {
6430   struct iv_ca *nw = XNEW (struct iv_ca);
6431
6432   nw->upto = 0;
6433   nw->bad_groups = 0;
6434   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6435                                  data->vgroups.length ());
6436   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6437   nw->cands = BITMAP_ALLOC (NULL);
6438   nw->n_cands = 0;
6439   nw->n_regs = 0;
6440   nw->cand_use_cost = no_cost;
6441   nw->cand_cost = 0;
6442   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6443   nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6444   nw->cost = no_cost;
6445
6446   return nw;
6447 }
6448
6449 /* Free memory occupied by the set IVS.  */
6450
6451 static void
6452 iv_ca_free (struct iv_ca **ivs)
6453 {
6454   free ((*ivs)->cand_for_group);
6455   free ((*ivs)->n_cand_uses);
6456   BITMAP_FREE ((*ivs)->cands);
6457   free ((*ivs)->n_invariant_uses);
6458   delete ((*ivs)->used_inv_exprs);
6459   free (*ivs);
6460   *ivs = NULL;
6461 }
6462
6463 /* Dumps IVS to FILE.  */
6464
6465 static void
6466 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6467 {
6468   unsigned i;
6469   comp_cost cost = iv_ca_cost (ivs);
6470
6471   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6472            cost.complexity);
6473   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6474            ivs->cand_cost, ivs->cand_use_cost.cost,
6475            ivs->cand_use_cost.complexity);
6476   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6477
6478   for (i = 0; i < ivs->upto; i++)
6479     {
6480       struct iv_group *group = data->vgroups[i];
6481       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6482       if (cp)
6483         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6484                  group->id, cp->cand->id, cp->cost.cost,
6485                  cp->cost.complexity);
6486       else
6487         fprintf (file, "   group:%d --> ??\n", group->id);
6488     }
6489
6490   const char *pref = "";
6491   fprintf (file, "  invariant variables: ");
6492   for (i = 1; i <= data->max_inv_id; i++)
6493     if (ivs->n_invariant_uses[i])
6494       {
6495         fprintf (file, "%s%d", pref, i);
6496         pref = ", ";
6497       }
6498
6499   pref = "";
6500   fprintf (file, "\n  invariant expressions: ");
6501   for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6502        = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6503     {
6504         fprintf (file, "%s%d", pref, (*it).first->id);
6505         pref = ", ";
6506     }
6507
6508   fprintf (file, "\n\n");
6509 }
6510
6511 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6512    new set, and store differences in DELTA.  Number of induction variables
6513    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6514    the function will try to find a solution with mimimal iv candidates.  */
6515
6516 static comp_cost
6517 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6518               struct iv_cand *cand, struct iv_ca_delta **delta,
6519               unsigned *n_ivs, bool min_ncand)
6520 {
6521   unsigned i;
6522   comp_cost cost;
6523   struct iv_group *group;
6524   struct cost_pair *old_cp, *new_cp;
6525
6526   *delta = NULL;
6527   for (i = 0; i < ivs->upto; i++)
6528     {
6529       group = data->vgroups[i];
6530       old_cp = iv_ca_cand_for_group (ivs, group);
6531
6532       if (old_cp
6533           && old_cp->cand == cand)
6534         continue;
6535
6536       new_cp = get_group_iv_cost (data, group, cand);
6537       if (!new_cp)
6538         continue;
6539
6540       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6541         continue;
6542
6543       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6544         continue;
6545
6546       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6547     }
6548
6549   iv_ca_delta_commit (data, ivs, *delta, true);
6550   cost = iv_ca_cost (ivs);
6551   if (n_ivs)
6552     *n_ivs = iv_ca_n_cands (ivs);
6553   iv_ca_delta_commit (data, ivs, *delta, false);
6554
6555   return cost;
6556 }
6557
6558 /* Try narrowing set IVS by removing CAND.  Return the cost of
6559    the new set and store the differences in DELTA.  START is
6560    the candidate with which we start narrowing.  */
6561
6562 static comp_cost
6563 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6564               struct iv_cand *cand, struct iv_cand *start,
6565               struct iv_ca_delta **delta)
6566 {
6567   unsigned i, ci;
6568   struct iv_group *group;
6569   struct cost_pair *old_cp, *new_cp, *cp;
6570   bitmap_iterator bi;
6571   struct iv_cand *cnd;
6572   comp_cost cost, best_cost, acost;
6573
6574   *delta = NULL;
6575   for (i = 0; i < data->vgroups.length (); i++)
6576     {
6577       group = data->vgroups[i];
6578
6579       old_cp = iv_ca_cand_for_group (ivs, group);
6580       if (old_cp->cand != cand)
6581         continue;
6582
6583       best_cost = iv_ca_cost (ivs);
6584       /* Start narrowing with START.  */
6585       new_cp = get_group_iv_cost (data, group, start);
6586
6587       if (data->consider_all_candidates)
6588         {
6589           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6590             {
6591               if (ci == cand->id || (start && ci == start->id))
6592                 continue;
6593
6594               cnd = data->vcands[ci];
6595
6596               cp = get_group_iv_cost (data, group, cnd);
6597               if (!cp)
6598                 continue;
6599
6600               iv_ca_set_cp (data, ivs, group, cp);
6601               acost = iv_ca_cost (ivs);
6602
6603               if (acost < best_cost)
6604                 {
6605                   best_cost = acost;
6606                   new_cp = cp;
6607                 }
6608             }
6609         }
6610       else
6611         {
6612           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6613             {
6614               if (ci == cand->id || (start && ci == start->id))
6615                 continue;
6616
6617               cnd = data->vcands[ci];
6618
6619               cp = get_group_iv_cost (data, group, cnd);
6620               if (!cp)
6621                 continue;
6622
6623               iv_ca_set_cp (data, ivs, group, cp);
6624               acost = iv_ca_cost (ivs);
6625
6626               if (acost < best_cost)
6627                 {
6628                   best_cost = acost;
6629                   new_cp = cp;
6630                 }
6631             }
6632         }
6633       /* Restore to old cp for use.  */
6634       iv_ca_set_cp (data, ivs, group, old_cp);
6635
6636       if (!new_cp)
6637         {
6638           iv_ca_delta_free (delta);
6639           return infinite_cost;
6640         }
6641
6642       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6643     }
6644
6645   iv_ca_delta_commit (data, ivs, *delta, true);
6646   cost = iv_ca_cost (ivs);
6647   iv_ca_delta_commit (data, ivs, *delta, false);
6648
6649   return cost;
6650 }
6651
6652 /* Try optimizing the set of candidates IVS by removing candidates different
6653    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6654    differences in DELTA.  */
6655
6656 static comp_cost
6657 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6658              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6659 {
6660   bitmap_iterator bi;
6661   struct iv_ca_delta *act_delta, *best_delta;
6662   unsigned i;
6663   comp_cost best_cost, acost;
6664   struct iv_cand *cand;
6665
6666   best_delta = NULL;
6667   best_cost = iv_ca_cost (ivs);
6668
6669   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6670     {
6671       cand = data->vcands[i];
6672
6673       if (cand == except_cand)
6674         continue;
6675
6676       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6677
6678       if (acost < best_cost)
6679         {
6680           best_cost = acost;
6681           iv_ca_delta_free (&best_delta);
6682           best_delta = act_delta;
6683         }
6684       else
6685         iv_ca_delta_free (&act_delta);
6686     }
6687
6688   if (!best_delta)
6689     {
6690       *delta = NULL;
6691       return best_cost;
6692     }
6693
6694   /* Recurse to possibly remove other unnecessary ivs.  */
6695   iv_ca_delta_commit (data, ivs, best_delta, true);
6696   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6697   iv_ca_delta_commit (data, ivs, best_delta, false);
6698   *delta = iv_ca_delta_join (best_delta, *delta);
6699   return best_cost;
6700 }
6701
6702 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6703    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6704    the corresponding cost_pair, otherwise just return BEST_CP.  */
6705
6706 static struct cost_pair*
6707 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6708                         unsigned int cand_idx, struct iv_cand *old_cand,
6709                         struct cost_pair *best_cp)
6710 {
6711   struct iv_cand *cand;
6712   struct cost_pair *cp;
6713
6714   gcc_assert (old_cand != NULL && best_cp != NULL);
6715   if (cand_idx == old_cand->id)
6716     return best_cp;
6717
6718   cand = data->vcands[cand_idx];
6719   cp = get_group_iv_cost (data, group, cand);
6720   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6721     return cp;
6722
6723   return best_cp;
6724 }
6725
6726 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6727    which are used by more than one iv uses.  For each of those candidates,
6728    this function tries to represent iv uses under that candidate using
6729    other ones with lower local cost, then tries to prune the new set.
6730    If the new set has lower cost, It returns the new cost after recording
6731    candidate replacement in list DELTA.  */
6732
6733 static comp_cost
6734 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6735                struct iv_ca_delta **delta)
6736 {
6737   bitmap_iterator bi, bj;
6738   unsigned int i, j, k;
6739   struct iv_cand *cand;
6740   comp_cost orig_cost, acost;
6741   struct iv_ca_delta *act_delta, *tmp_delta;
6742   struct cost_pair *old_cp, *best_cp = NULL;
6743
6744   *delta = NULL;
6745   orig_cost = iv_ca_cost (ivs);
6746
6747   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6748     {
6749       if (ivs->n_cand_uses[i] == 1
6750           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6751         continue;
6752
6753       cand = data->vcands[i];
6754
6755       act_delta = NULL;
6756       /*  Represent uses under current candidate using other ones with
6757           lower local cost.  */
6758       for (j = 0; j < ivs->upto; j++)
6759         {
6760           struct iv_group *group = data->vgroups[j];
6761           old_cp = iv_ca_cand_for_group (ivs, group);
6762
6763           if (old_cp->cand != cand)
6764             continue;
6765
6766           best_cp = old_cp;
6767           if (data->consider_all_candidates)
6768             for (k = 0; k < data->vcands.length (); k++)
6769               best_cp = cheaper_cost_with_cand (data, group, k,
6770                                                 old_cp->cand, best_cp);
6771           else
6772             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6773               best_cp = cheaper_cost_with_cand (data, group, k,
6774                                                 old_cp->cand, best_cp);
6775
6776           if (best_cp == old_cp)
6777             continue;
6778
6779           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6780         }
6781       /* No need for further prune.  */
6782       if (!act_delta)
6783         continue;
6784
6785       /* Prune the new candidate set.  */
6786       iv_ca_delta_commit (data, ivs, act_delta, true);
6787       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6788       iv_ca_delta_commit (data, ivs, act_delta, false);
6789       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6790
6791       if (acost < orig_cost)
6792         {
6793           *delta = act_delta;
6794           return acost;
6795         }
6796       else
6797         iv_ca_delta_free (&act_delta);
6798     }
6799
6800   return orig_cost;
6801 }
6802
6803 /* Tries to extend the sets IVS in the best possible way in order to
6804    express the GROUP.  If ORIGINALP is true, prefer candidates from
6805    the original set of IVs, otherwise favor important candidates not
6806    based on any memory object.  */
6807
6808 static bool
6809 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6810                   struct iv_group *group, bool originalp)
6811 {
6812   comp_cost best_cost, act_cost;
6813   unsigned i;
6814   bitmap_iterator bi;
6815   struct iv_cand *cand;
6816   struct iv_ca_delta *best_delta = NULL, *act_delta;
6817   struct cost_pair *cp;
6818
6819   iv_ca_add_group (data, ivs, group);
6820   best_cost = iv_ca_cost (ivs);
6821   cp = iv_ca_cand_for_group (ivs, group);
6822   if (cp)
6823     {
6824       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6825       iv_ca_set_no_cp (data, ivs, group);
6826     }
6827
6828   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6829      first try important candidates not based on any memory object.  Only if
6830      this fails, try the specific ones.  Rationale -- in loops with many
6831      variables the best choice often is to use just one generic biv.  If we
6832      added here many ivs specific to the uses, the optimization algorithm later
6833      would be likely to get stuck in a local minimum, thus causing us to create
6834      too many ivs.  The approach from few ivs to more seems more likely to be
6835      successful -- starting from few ivs, replacing an expensive use by a
6836      specific iv should always be a win.  */
6837   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6838     {
6839       cand = data->vcands[i];
6840
6841       if (originalp && cand->pos !=IP_ORIGINAL)
6842         continue;
6843
6844       if (!originalp && cand->iv->base_object != NULL_TREE)
6845         continue;
6846
6847       if (iv_ca_cand_used_p (ivs, cand))
6848         continue;
6849
6850       cp = get_group_iv_cost (data, group, cand);
6851       if (!cp)
6852         continue;
6853
6854       iv_ca_set_cp (data, ivs, group, cp);
6855       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6856                                true);
6857       iv_ca_set_no_cp (data, ivs, group);
6858       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6859
6860       if (act_cost < best_cost)
6861         {
6862           best_cost = act_cost;
6863
6864           iv_ca_delta_free (&best_delta);
6865           best_delta = act_delta;
6866         }
6867       else
6868         iv_ca_delta_free (&act_delta);
6869     }
6870
6871   if (best_cost.infinite_cost_p ())
6872     {
6873       for (i = 0; i < group->n_map_members; i++)
6874         {
6875           cp = group->cost_map + i;
6876           cand = cp->cand;
6877           if (!cand)
6878             continue;
6879
6880           /* Already tried this.  */
6881           if (cand->important)
6882             {
6883               if (originalp && cand->pos == IP_ORIGINAL)
6884                 continue;
6885               if (!originalp && cand->iv->base_object == NULL_TREE)
6886                 continue;
6887             }
6888
6889           if (iv_ca_cand_used_p (ivs, cand))
6890             continue;
6891
6892           act_delta = NULL;
6893           iv_ca_set_cp (data, ivs, group, cp);
6894           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6895           iv_ca_set_no_cp (data, ivs, group);
6896           act_delta = iv_ca_delta_add (group,
6897                                        iv_ca_cand_for_group (ivs, group),
6898                                        cp, act_delta);
6899
6900           if (act_cost < best_cost)
6901             {
6902               best_cost = act_cost;
6903
6904               if (best_delta)
6905                 iv_ca_delta_free (&best_delta);
6906               best_delta = act_delta;
6907             }
6908           else
6909             iv_ca_delta_free (&act_delta);
6910         }
6911     }
6912
6913   iv_ca_delta_commit (data, ivs, best_delta, true);
6914   iv_ca_delta_free (&best_delta);
6915
6916   return !best_cost.infinite_cost_p ();
6917 }
6918
6919 /* Finds an initial assignment of candidates to uses.  */
6920
6921 static struct iv_ca *
6922 get_initial_solution (struct ivopts_data *data, bool originalp)
6923 {
6924   unsigned i;
6925   struct iv_ca *ivs = iv_ca_new (data);
6926
6927   for (i = 0; i < data->vgroups.length (); i++)
6928     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6929       {
6930         iv_ca_free (&ivs);
6931         return NULL;
6932       }
6933
6934   return ivs;
6935 }
6936
6937 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6938    points to a bool variable, this function tries to break local
6939    optimal fixed-point by replacing candidates in IVS if it's true.  */
6940
6941 static bool
6942 try_improve_iv_set (struct ivopts_data *data,
6943                     struct iv_ca *ivs, bool *try_replace_p)
6944 {
6945   unsigned i, n_ivs;
6946   comp_cost acost, best_cost = iv_ca_cost (ivs);
6947   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6948   struct iv_cand *cand;
6949
6950   /* Try extending the set of induction variables by one.  */
6951   for (i = 0; i < data->vcands.length (); i++)
6952     {
6953       cand = data->vcands[i];
6954
6955       if (iv_ca_cand_used_p (ivs, cand))
6956         continue;
6957
6958       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6959       if (!act_delta)
6960         continue;
6961
6962       /* If we successfully added the candidate and the set is small enough,
6963          try optimizing it by removing other candidates.  */
6964       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6965         {
6966           iv_ca_delta_commit (data, ivs, act_delta, true);
6967           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6968           iv_ca_delta_commit (data, ivs, act_delta, false);
6969           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6970         }
6971
6972       if (acost < best_cost)
6973         {
6974           best_cost = acost;
6975           iv_ca_delta_free (&best_delta);
6976           best_delta = act_delta;
6977         }
6978       else
6979         iv_ca_delta_free (&act_delta);
6980     }
6981
6982   if (!best_delta)
6983     {
6984       /* Try removing the candidates from the set instead.  */
6985       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6986
6987       if (!best_delta && *try_replace_p)
6988         {
6989           *try_replace_p = false;
6990           /* So far candidate selecting algorithm tends to choose fewer IVs
6991              so that it can handle cases in which loops have many variables
6992              but the best choice is often to use only one general biv.  One
6993              weakness is it can't handle opposite cases, in which different
6994              candidates should be chosen with respect to each use.  To solve
6995              the problem, we replace candidates in a manner described by the
6996              comments of iv_ca_replace, thus give general algorithm a chance
6997              to break local optimal fixed-point in these cases.  */
6998           best_cost = iv_ca_replace (data, ivs, &best_delta);
6999         }
7000
7001       if (!best_delta)
7002         return false;
7003     }
7004
7005   iv_ca_delta_commit (data, ivs, best_delta, true);
7006   gcc_assert (best_cost == iv_ca_cost (ivs));
7007   iv_ca_delta_free (&best_delta);
7008   return true;
7009 }
7010
7011 /* Attempts to find the optimal set of induction variables.  We do simple
7012    greedy heuristic -- we try to replace at most one candidate in the selected
7013    solution and remove the unused ivs while this improves the cost.  */
7014
7015 static struct iv_ca *
7016 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7017 {
7018   struct iv_ca *set;
7019   bool try_replace_p = true;
7020
7021   /* Get the initial solution.  */
7022   set = get_initial_solution (data, originalp);
7023   if (!set)
7024     {
7025       if (dump_file && (dump_flags & TDF_DETAILS))
7026         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7027       return NULL;
7028     }
7029
7030   if (dump_file && (dump_flags & TDF_DETAILS))
7031     {
7032       fprintf (dump_file, "Initial set of candidates:\n");
7033       iv_ca_dump (data, dump_file, set);
7034     }
7035
7036   while (try_improve_iv_set (data, set, &try_replace_p))
7037     {
7038       if (dump_file && (dump_flags & TDF_DETAILS))
7039         {
7040           fprintf (dump_file, "Improved to:\n");
7041           iv_ca_dump (data, dump_file, set);
7042         }
7043     }
7044
7045   return set;
7046 }
7047
7048 static struct iv_ca *
7049 find_optimal_iv_set (struct ivopts_data *data)
7050 {
7051   unsigned i;
7052   comp_cost cost, origcost;
7053   struct iv_ca *set, *origset;
7054
7055   /* Determine the cost based on a strategy that starts with original IVs,
7056      and try again using a strategy that prefers candidates not based
7057      on any IVs.  */
7058   origset = find_optimal_iv_set_1 (data, true);
7059   set = find_optimal_iv_set_1 (data, false);
7060
7061   if (!origset && !set)
7062     return NULL;
7063
7064   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7065   cost = set ? iv_ca_cost (set) : infinite_cost;
7066
7067   if (dump_file && (dump_flags & TDF_DETAILS))
7068     {
7069       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7070                origcost.cost, origcost.complexity);
7071       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7072                cost.cost, cost.complexity);
7073     }
7074
7075   /* Choose the one with the best cost.  */
7076   if (origcost <= cost)
7077     {
7078       if (set)
7079         iv_ca_free (&set);
7080       set = origset;
7081     }
7082   else if (origset)
7083     iv_ca_free (&origset);
7084
7085   for (i = 0; i < data->vgroups.length (); i++)
7086     {
7087       struct iv_group *group = data->vgroups[i];
7088       group->selected = iv_ca_cand_for_group (set, group)->cand;
7089     }
7090
7091   return set;
7092 }
7093
7094 /* Creates a new induction variable corresponding to CAND.  */
7095
7096 static void
7097 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7098 {
7099   gimple_stmt_iterator incr_pos;
7100   tree base;
7101   struct iv_use *use;
7102   struct iv_group *group;
7103   bool after = false;
7104
7105   if (!cand->iv)
7106     return;
7107
7108   switch (cand->pos)
7109     {
7110     case IP_NORMAL:
7111       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7112       break;
7113
7114     case IP_END:
7115       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7116       after = true;
7117       break;
7118
7119     case IP_AFTER_USE:
7120       after = true;
7121       /* fall through */
7122     case IP_BEFORE_USE:
7123       incr_pos = gsi_for_stmt (cand->incremented_at);
7124       break;
7125
7126     case IP_ORIGINAL:
7127       /* Mark that the iv is preserved.  */
7128       name_info (data, cand->var_before)->preserve_biv = true;
7129       name_info (data, cand->var_after)->preserve_biv = true;
7130
7131       /* Rewrite the increment so that it uses var_before directly.  */
7132       use = find_interesting_uses_op (data, cand->var_after);
7133       group = data->vgroups[use->group_id];
7134       group->selected = cand;
7135       return;
7136     }
7137
7138   gimple_add_tmp_var (cand->var_before);
7139
7140   base = unshare_expr (cand->iv->base);
7141
7142   create_iv (base, unshare_expr (cand->iv->step),
7143              cand->var_before, data->current_loop,
7144              &incr_pos, after, &cand->var_before, &cand->var_after);
7145 }
7146
7147 /* Creates new induction variables described in SET.  */
7148
7149 static void
7150 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7151 {
7152   unsigned i;
7153   struct iv_cand *cand;
7154   bitmap_iterator bi;
7155
7156   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7157     {
7158       cand = data->vcands[i];
7159       create_new_iv (data, cand);
7160     }
7161
7162   if (dump_file && (dump_flags & TDF_DETAILS))
7163     {
7164       fprintf (dump_file, "Selected IV set for loop %d",
7165                data->current_loop->num);
7166       if (data->loop_loc != UNKNOWN_LOCATION)
7167         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7168                  LOCATION_LINE (data->loop_loc));
7169       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7170                avg_loop_niter (data->current_loop));
7171       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7172                (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7173       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7174       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7175         {
7176           cand = data->vcands[i];
7177           dump_cand (dump_file, cand);
7178         }
7179       fprintf (dump_file, "\n");
7180     }
7181 }
7182
7183 /* Rewrites USE (definition of iv used in a nonlinear expression)
7184    using candidate CAND.  */
7185
7186 static void
7187 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7188                             struct iv_use *use, struct iv_cand *cand)
7189 {
7190   tree comp;
7191   tree op, tgt;
7192   gassign *ass;
7193   gimple_stmt_iterator bsi;
7194
7195   /* An important special case -- if we are asked to express value of
7196      the original iv by itself, just exit; there is no need to
7197      introduce a new computation (that might also need casting the
7198      variable to unsigned and back).  */
7199   if (cand->pos == IP_ORIGINAL
7200       && cand->incremented_at == use->stmt)
7201     {
7202       enum tree_code stmt_code;
7203
7204       gcc_assert (is_gimple_assign (use->stmt));
7205       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7206
7207       /* Check whether we may leave the computation unchanged.
7208          This is the case only if it does not rely on other
7209          computations in the loop -- otherwise, the computation
7210          we rely upon may be removed in remove_unused_ivs,
7211          thus leading to ICE.  */
7212       stmt_code = gimple_assign_rhs_code (use->stmt);
7213       if (stmt_code == PLUS_EXPR
7214           || stmt_code == MINUS_EXPR
7215           || stmt_code == POINTER_PLUS_EXPR)
7216         {
7217           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7218             op = gimple_assign_rhs2 (use->stmt);
7219           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7220             op = gimple_assign_rhs1 (use->stmt);
7221           else
7222             op = NULL_TREE;
7223         }
7224       else
7225         op = NULL_TREE;
7226
7227       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7228         return;
7229     }
7230
7231   comp = get_computation (data->current_loop, use, cand);
7232   gcc_assert (comp != NULL_TREE);
7233
7234   switch (gimple_code (use->stmt))
7235     {
7236     case GIMPLE_PHI:
7237       tgt = PHI_RESULT (use->stmt);
7238
7239       /* If we should keep the biv, do not replace it.  */
7240       if (name_info (data, tgt)->preserve_biv)
7241         return;
7242
7243       bsi = gsi_after_labels (gimple_bb (use->stmt));
7244       break;
7245
7246     case GIMPLE_ASSIGN:
7247       tgt = gimple_assign_lhs (use->stmt);
7248       bsi = gsi_for_stmt (use->stmt);
7249       break;
7250
7251     default:
7252       gcc_unreachable ();
7253     }
7254
7255   if (!valid_gimple_rhs_p (comp)
7256       || (gimple_code (use->stmt) != GIMPLE_PHI
7257           /* We can't allow re-allocating the stmt as it might be pointed
7258              to still.  */
7259           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7260               >= gimple_num_ops (gsi_stmt (bsi)))))
7261     {
7262       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7263                                        true, GSI_SAME_STMT);
7264       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7265         {
7266           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7267           /* As this isn't a plain copy we have to reset alignment
7268              information.  */
7269           if (SSA_NAME_PTR_INFO (comp))
7270             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7271         }
7272     }
7273
7274   if (gimple_code (use->stmt) == GIMPLE_PHI)
7275     {
7276       ass = gimple_build_assign (tgt, comp);
7277       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7278
7279       bsi = gsi_for_stmt (use->stmt);
7280       remove_phi_node (&bsi, false);
7281     }
7282   else
7283     {
7284       gimple_assign_set_rhs_from_tree (&bsi, comp);
7285       use->stmt = gsi_stmt (bsi);
7286     }
7287 }
7288
7289 /* Performs a peephole optimization to reorder the iv update statement with
7290    a mem ref to enable instruction combining in later phases. The mem ref uses
7291    the iv value before the update, so the reordering transformation requires
7292    adjustment of the offset. CAND is the selected IV_CAND.
7293
7294    Example:
7295
7296    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7297    iv2 = iv1 + 1;
7298
7299    if (t < val)      (1)
7300      goto L;
7301    goto Head;
7302
7303
7304    directly propagating t over to (1) will introduce overlapping live range
7305    thus increase register pressure. This peephole transform it into:
7306
7307
7308    iv2 = iv1 + 1;
7309    t = MEM_REF (base, iv2, 8, 8);
7310    if (t < val)
7311      goto L;
7312    goto Head;
7313 */
7314
7315 static void
7316 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7317 {
7318   tree var_after;
7319   gimple *iv_update, *stmt;
7320   basic_block bb;
7321   gimple_stmt_iterator gsi, gsi_iv;
7322
7323   if (cand->pos != IP_NORMAL)
7324     return;
7325
7326   var_after = cand->var_after;
7327   iv_update = SSA_NAME_DEF_STMT (var_after);
7328
7329   bb = gimple_bb (iv_update);
7330   gsi = gsi_last_nondebug_bb (bb);
7331   stmt = gsi_stmt (gsi);
7332
7333   /* Only handle conditional statement for now.  */
7334   if (gimple_code (stmt) != GIMPLE_COND)
7335     return;
7336
7337   gsi_prev_nondebug (&gsi);
7338   stmt = gsi_stmt (gsi);
7339   if (stmt != iv_update)
7340     return;
7341
7342   gsi_prev_nondebug (&gsi);
7343   if (gsi_end_p (gsi))
7344     return;
7345
7346   stmt = gsi_stmt (gsi);
7347   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7348     return;
7349
7350   if (stmt != use->stmt)
7351     return;
7352
7353   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7354     return;
7355
7356   if (dump_file && (dump_flags & TDF_DETAILS))
7357     {
7358       fprintf (dump_file, "Reordering \n");
7359       print_gimple_stmt (dump_file, iv_update, 0, 0);
7360       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7361       fprintf (dump_file, "\n");
7362     }
7363
7364   gsi = gsi_for_stmt (use->stmt);
7365   gsi_iv = gsi_for_stmt (iv_update);
7366   gsi_move_before (&gsi_iv, &gsi);
7367
7368   cand->pos = IP_BEFORE_USE;
7369   cand->incremented_at = use->stmt;
7370 }
7371
7372 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7373
7374 static void
7375 rewrite_use_address (struct ivopts_data *data,
7376                      struct iv_use *use, struct iv_cand *cand)
7377 {
7378   aff_tree aff;
7379   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7380   tree base_hint = NULL_TREE;
7381   tree ref, iv;
7382   bool ok;
7383
7384   adjust_iv_update_pos (cand, use);
7385   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7386   gcc_assert (ok);
7387   unshare_aff_combination (&aff);
7388
7389   /* To avoid undefined overflow problems, all IV candidates use unsigned
7390      integer types.  The drawback is that this makes it impossible for
7391      create_mem_ref to distinguish an IV that is based on a memory object
7392      from one that represents simply an offset.
7393
7394      To work around this problem, we pass a hint to create_mem_ref that
7395      indicates which variable (if any) in aff is an IV based on a memory
7396      object.  Note that we only consider the candidate.  If this is not
7397      based on an object, the base of the reference is in some subexpression
7398      of the use -- but these will use pointer types, so they are recognized
7399      by the create_mem_ref heuristics anyway.  */
7400   if (cand->iv->base_object)
7401     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7402
7403   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7404   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7405                         reference_alias_ptr_type (*use->op_p),
7406                         iv, base_hint, data->speed);
7407   copy_ref_info (ref, *use->op_p);
7408   *use->op_p = ref;
7409 }
7410
7411 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7412    candidate CAND.  */
7413
7414 static void
7415 rewrite_use_compare (struct ivopts_data *data,
7416                      struct iv_use *use, struct iv_cand *cand)
7417 {
7418   tree comp, *var_p, op, bound;
7419   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7420   enum tree_code compare;
7421   struct iv_group *group = data->vgroups[use->group_id];
7422   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7423   bool ok;
7424
7425   bound = cp->value;
7426   if (bound)
7427     {
7428       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7429       tree var_type = TREE_TYPE (var);
7430       gimple_seq stmts;
7431
7432       if (dump_file && (dump_flags & TDF_DETAILS))
7433         {
7434           fprintf (dump_file, "Replacing exit test: ");
7435           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7436         }
7437       compare = cp->comp;
7438       bound = unshare_expr (fold_convert (var_type, bound));
7439       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7440       if (stmts)
7441         gsi_insert_seq_on_edge_immediate (
7442                 loop_preheader_edge (data->current_loop),
7443                 stmts);
7444
7445       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7446       gimple_cond_set_lhs (cond_stmt, var);
7447       gimple_cond_set_code (cond_stmt, compare);
7448       gimple_cond_set_rhs (cond_stmt, op);
7449       return;
7450     }
7451
7452   /* The induction variable elimination failed; just express the original
7453      giv.  */
7454   comp = get_computation (data->current_loop, use, cand);
7455   gcc_assert (comp != NULL_TREE);
7456
7457   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7458   gcc_assert (ok);
7459
7460   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7461                                      true, GSI_SAME_STMT);
7462 }
7463
7464 /* Rewrite the groups using the selected induction variables.  */
7465
7466 static void
7467 rewrite_groups (struct ivopts_data *data)
7468 {
7469   unsigned i, j;
7470
7471   for (i = 0; i < data->vgroups.length (); i++)
7472     {
7473       struct iv_group *group = data->vgroups[i];
7474       struct iv_cand *cand = group->selected;
7475
7476       gcc_assert (cand);
7477
7478       if (group->type == USE_NONLINEAR_EXPR)
7479         {
7480           for (j = 0; j < group->vuses.length (); j++)
7481             {
7482               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7483               update_stmt (group->vuses[j]->stmt);
7484             }
7485         }
7486       else if (group->type == USE_ADDRESS)
7487         {
7488           for (j = 0; j < group->vuses.length (); j++)
7489             {
7490               rewrite_use_address (data, group->vuses[j], cand);
7491               update_stmt (group->vuses[j]->stmt);
7492             }
7493         }
7494       else
7495         {
7496           gcc_assert (group->type == USE_COMPARE);
7497
7498           for (j = 0; j < group->vuses.length (); j++)
7499             {
7500               rewrite_use_compare (data, group->vuses[j], cand);
7501               update_stmt (group->vuses[j]->stmt);
7502             }
7503         }
7504     }
7505 }
7506
7507 /* Removes the ivs that are not used after rewriting.  */
7508
7509 static void
7510 remove_unused_ivs (struct ivopts_data *data)
7511 {
7512   unsigned j;
7513   bitmap_iterator bi;
7514   bitmap toremove = BITMAP_ALLOC (NULL);
7515
7516   /* Figure out an order in which to release SSA DEFs so that we don't
7517      release something that we'd have to propagate into a debug stmt
7518      afterwards.  */
7519   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7520     {
7521       struct version_info *info;
7522
7523       info = ver_info (data, j);
7524       if (info->iv
7525           && !integer_zerop (info->iv->step)
7526           && !info->inv_id
7527           && !info->iv->nonlin_use
7528           && !info->preserve_biv)
7529         {
7530           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7531
7532           tree def = info->iv->ssa_name;
7533
7534           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7535             {
7536               imm_use_iterator imm_iter;
7537               use_operand_p use_p;
7538               gimple *stmt;
7539               int count = 0;
7540
7541               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7542                 {
7543                   if (!gimple_debug_bind_p (stmt))
7544                     continue;
7545
7546                   /* We just want to determine whether to do nothing
7547                      (count == 0), to substitute the computed
7548                      expression into a single use of the SSA DEF by
7549                      itself (count == 1), or to use a debug temp
7550                      because the SSA DEF is used multiple times or as
7551                      part of a larger expression (count > 1). */
7552                   count++;
7553                   if (gimple_debug_bind_get_value (stmt) != def)
7554                     count++;
7555
7556                   if (count > 1)
7557                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7558                 }
7559
7560               if (!count)
7561                 continue;
7562
7563               struct iv_use dummy_use;
7564               struct iv_cand *best_cand = NULL, *cand;
7565               unsigned i, best_pref = 0, cand_pref;
7566
7567               memset (&dummy_use, 0, sizeof (dummy_use));
7568               dummy_use.iv = info->iv;
7569               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7570                 {
7571                   cand = data->vgroups[i]->selected;
7572                   if (cand == best_cand)
7573                     continue;
7574                   cand_pref = operand_equal_p (cand->iv->step,
7575                                                info->iv->step, 0)
7576                     ? 4 : 0;
7577                   cand_pref
7578                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7579                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7580                     ? 2 : 0;
7581                   cand_pref
7582                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7583                     ? 1 : 0;
7584                   if (best_cand == NULL || best_pref < cand_pref)
7585                     {
7586                       best_cand = cand;
7587                       best_pref = cand_pref;
7588                     }
7589                 }
7590
7591               if (!best_cand)
7592                 continue;
7593
7594               tree comp = get_computation_at (data->current_loop,
7595                                               &dummy_use, best_cand,
7596                                               SSA_NAME_DEF_STMT (def));
7597               if (!comp)
7598                 continue;
7599
7600               if (count > 1)
7601                 {
7602                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7603                   DECL_ARTIFICIAL (vexpr) = 1;
7604                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7605                   if (SSA_NAME_VAR (def))
7606                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7607                   else
7608                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7609                   gdebug *def_temp
7610                     = gimple_build_debug_bind (vexpr, comp, NULL);
7611                   gimple_stmt_iterator gsi;
7612
7613                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7614                     gsi = gsi_after_labels (gimple_bb
7615                                             (SSA_NAME_DEF_STMT (def)));
7616                   else
7617                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7618
7619                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7620                   comp = vexpr;
7621                 }
7622
7623               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7624                 {
7625                   if (!gimple_debug_bind_p (stmt))
7626                     continue;
7627
7628                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7629                     SET_USE (use_p, comp);
7630
7631                   update_stmt (stmt);
7632                 }
7633             }
7634         }
7635     }
7636
7637   release_defs_bitset (toremove);
7638
7639   BITMAP_FREE (toremove);
7640 }
7641
7642 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7643    for hash_map::traverse.  */
7644
7645 bool
7646 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7647 {
7648   free (value);
7649   return true;
7650 }
7651
7652 /* Frees data allocated by the optimization of a single loop.  */
7653
7654 static void
7655 free_loop_data (struct ivopts_data *data)
7656 {
7657   unsigned i, j;
7658   bitmap_iterator bi;
7659   tree obj;
7660
7661   if (data->niters)
7662     {
7663       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7664       delete data->niters;
7665       data->niters = NULL;
7666     }
7667
7668   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7669     {
7670       struct version_info *info;
7671
7672       info = ver_info (data, i);
7673       info->iv = NULL;
7674       info->has_nonlin_use = false;
7675       info->preserve_biv = false;
7676       info->inv_id = 0;
7677     }
7678   bitmap_clear (data->relevant);
7679   bitmap_clear (data->important_candidates);
7680
7681   for (i = 0; i < data->vgroups.length (); i++)
7682     {
7683       struct iv_group *group = data->vgroups[i];
7684
7685       for (j = 0; j < group->vuses.length (); j++)
7686         free (group->vuses[j]);
7687       group->vuses.release ();
7688
7689       BITMAP_FREE (group->related_cands);
7690       for (j = 0; j < group->n_map_members; j++)
7691         if (group->cost_map[j].depends_on)
7692           BITMAP_FREE (group->cost_map[j].depends_on);
7693
7694       free (group->cost_map);
7695       free (group);
7696     }
7697   data->vgroups.truncate (0);
7698
7699   for (i = 0; i < data->vcands.length (); i++)
7700     {
7701       struct iv_cand *cand = data->vcands[i];
7702
7703       if (cand->depends_on)
7704         BITMAP_FREE (cand->depends_on);
7705       free (cand);
7706     }
7707   data->vcands.truncate (0);
7708
7709   if (data->version_info_size < num_ssa_names)
7710     {
7711       data->version_info_size = 2 * num_ssa_names;
7712       free (data->version_info);
7713       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7714     }
7715
7716   data->max_inv_id = 0;
7717
7718   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7719     SET_DECL_RTL (obj, NULL_RTX);
7720
7721   decl_rtl_to_reset.truncate (0);
7722
7723   data->inv_expr_tab->empty ();
7724   data->max_inv_expr_id = 0;
7725
7726   data->iv_common_cand_tab->empty ();
7727   data->iv_common_cands.truncate (0);
7728 }
7729
7730 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7731    loop tree.  */
7732
7733 static void
7734 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7735 {
7736   free_loop_data (data);
7737   free (data->version_info);
7738   BITMAP_FREE (data->relevant);
7739   BITMAP_FREE (data->important_candidates);
7740
7741   decl_rtl_to_reset.release ();
7742   data->vgroups.release ();
7743   data->vcands.release ();
7744   delete data->inv_expr_tab;
7745   data->inv_expr_tab = NULL;
7746   free_affine_expand_cache (&data->name_expansion_cache);
7747   delete data->iv_common_cand_tab;
7748   data->iv_common_cand_tab = NULL;
7749   data->iv_common_cands.release ();
7750   obstack_free (&data->iv_obstack, NULL);
7751 }
7752
7753 /* Returns true if the loop body BODY includes any function calls.  */
7754
7755 static bool
7756 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7757 {
7758   gimple_stmt_iterator gsi;
7759   unsigned i;
7760
7761   for (i = 0; i < num_nodes; i++)
7762     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7763       {
7764         gimple *stmt = gsi_stmt (gsi);
7765         if (is_gimple_call (stmt)
7766             && !gimple_call_internal_p (stmt)
7767             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7768           return true;
7769       }
7770   return false;
7771 }
7772
7773 /* Optimizes the LOOP.  Returns true if anything changed.  */
7774
7775 static bool
7776 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7777 {
7778   bool changed = false;
7779   struct iv_ca *iv_ca;
7780   edge exit = single_dom_exit (loop);
7781   basic_block *body;
7782
7783   gcc_assert (!data->niters);
7784   data->current_loop = loop;
7785   data->loop_loc = find_loop_location (loop);
7786   data->speed = optimize_loop_for_speed_p (loop);
7787
7788   if (dump_file && (dump_flags & TDF_DETAILS))
7789     {
7790       fprintf (dump_file, "Processing loop %d", loop->num);
7791       if (data->loop_loc != UNKNOWN_LOCATION)
7792         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7793                  LOCATION_LINE (data->loop_loc));
7794       fprintf (dump_file, "\n");
7795
7796       if (exit)
7797         {
7798           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7799                    exit->src->index, exit->dest->index);
7800           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7801           fprintf (dump_file, "\n");
7802         }
7803
7804       fprintf (dump_file, "\n");
7805     }
7806
7807   body = get_loop_body (loop);
7808   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7809   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7810   free (body);
7811
7812   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7813
7814   /* For each ssa name determines whether it behaves as an induction variable
7815      in some loop.  */
7816   if (!find_induction_variables (data))
7817     goto finish;
7818
7819   /* Finds interesting uses (item 1).  */
7820   find_interesting_uses (data);
7821   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7822     goto finish;
7823
7824   /* Finds candidates for the induction variables (item 2).  */
7825   find_iv_candidates (data);
7826
7827   /* Calculates the costs (item 3, part 1).  */
7828   determine_iv_costs (data);
7829   determine_group_iv_costs (data);
7830   determine_set_costs (data);
7831
7832   /* Find the optimal set of induction variables (item 3, part 2).  */
7833   iv_ca = find_optimal_iv_set (data);
7834   if (!iv_ca)
7835     goto finish;
7836   changed = true;
7837
7838   /* Create the new induction variables (item 4, part 1).  */
7839   create_new_ivs (data, iv_ca);
7840   iv_ca_free (&iv_ca);
7841
7842   /* Rewrite the uses (item 4, part 2).  */
7843   rewrite_groups (data);
7844
7845   /* Remove the ivs that are unused after rewriting.  */
7846   remove_unused_ivs (data);
7847
7848   /* We have changed the structure of induction variables; it might happen
7849      that definitions in the scev database refer to some of them that were
7850      eliminated.  */
7851   scev_reset ();
7852
7853 finish:
7854   free_loop_data (data);
7855
7856   return changed;
7857 }
7858
7859 /* Main entry point.  Optimizes induction variables in loops.  */
7860
7861 void
7862 tree_ssa_iv_optimize (void)
7863 {
7864   struct loop *loop;
7865   struct ivopts_data data;
7866
7867   tree_ssa_iv_optimize_init (&data);
7868
7869   /* Optimize the loops starting with the innermost ones.  */
7870   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7871     {
7872       if (dump_file && (dump_flags & TDF_DETAILS))
7873         flow_loop_dump (loop, dump_file, NULL, 1);
7874
7875       tree_ssa_iv_optimize_loop (&data, loop);
7876     }
7877
7878   tree_ssa_iv_optimize_finalize (&data);
7879 }