gcc/tree-ssa-loop-ivopts.cc

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134 #include "cfganal.h"
 135
 136 /* For lang_hooks.types.type_for_mode.  */
 137 #include "langhooks.h"
 138
 139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 140    cost of different addressing modes.  This should be moved to a TBD
 141    interface between the GIMPLE and RTL worlds.  */
 142
 143 /* The infinite cost.  */
 144 #define INFTY 1000000000
 145
 146 /* Returns the expected number of loop iterations for LOOP.
 147    The average trip count is computed from profile data if it
 148    exists. */
 149
 150 static inline HOST_WIDE_INT
 151 avg_loop_niter (class loop *loop)
 152 {
 153   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 154   if (niter == -1)
 155     {
 156       niter = likely_max_stmt_executions_int (loop);
 157
 158       if (niter == -1 || niter > param_avg_loop_niter)
 159         return param_avg_loop_niter;
 160     }
 161
 162   return niter;
 163 }
 164
 165 struct iv_use;
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 175   bool biv_p;           /* Is it a biv?  */
 176   bool no_overflow;     /* True if the iv doesn't overflow.  */
 177   bool have_address_use;/* For biv, indicate if it's used in any address
 178                            type use.  */
 179 };
 180
 181 /* Per-ssa version information (induction variable descriptions, etc.).  */
 182 struct version_info
 183 {
 184   tree name;            /* The ssa name.  */
 185   struct iv *iv;        /* Induction variable description.  */
 186   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 187                            an expression that is not an induction variable.  */
 188   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 189   unsigned inv_id;      /* Id of an invariant.  */
 190 };
 191
 192 /* Types of uses.  */
 193 enum use_type
 194 {
 195   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 196   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 197                            reference.  */
 198   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 199                            cases where the expansion of the function
 200                            will turn the argument into a normal address.  */
 201   USE_COMPARE           /* Use is a compare.  */
 202 };
 203
 204 /* Cost of a computation.  */
 205 class comp_cost
 206 {
 207 public:
 208   comp_cost (): cost (0), complexity (0), scratch (0)
 209   {}
 210
 211   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 212     : cost (cost), complexity (complexity), scratch (scratch)
 213   {}
 214
 215   /* Returns true if COST is infinite.  */
 216   bool infinite_cost_p ();
 217
 218   /* Adds costs COST1 and COST2.  */
 219   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 220
 221   /* Adds COST to the comp_cost.  */
 222   comp_cost operator+= (comp_cost cost);
 223
 224   /* Adds constant C to this comp_cost.  */
 225   comp_cost operator+= (HOST_WIDE_INT c);
 226
 227   /* Subtracts constant C to this comp_cost.  */
 228   comp_cost operator-= (HOST_WIDE_INT c);
 229
 230   /* Divide the comp_cost by constant C.  */
 231   comp_cost operator/= (HOST_WIDE_INT c);
 232
 233   /* Multiply the comp_cost by constant C.  */
 234   comp_cost operator*= (HOST_WIDE_INT c);
 235
 236   /* Subtracts costs COST1 and COST2.  */
 237   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 238
 239   /* Subtracts COST from this comp_cost.  */
 240   comp_cost operator-= (comp_cost cost);
 241
 242   /* Returns true if COST1 is smaller than COST2.  */
 243   friend bool operator< (comp_cost cost1, comp_cost cost2);
 244
 245   /* Returns true if COST1 and COST2 are equal.  */
 246   friend bool operator== (comp_cost cost1, comp_cost cost2);
 247
 248   /* Returns true if COST1 is smaller or equal than COST2.  */
 249   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 250
 251   int64_t cost;         /* The runtime cost.  */
 252   unsigned complexity;  /* The estimate of the complexity of the code for
 253                            the computation (in no concrete units --
 254                            complexity field should be larger for more
 255                            complex expressions and addressing modes).  */
 256   int64_t scratch;      /* Scratch used during cost computation.  */
 257 };
 258
 259 static const comp_cost no_cost;
 260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 261
 262 bool
 263 comp_cost::infinite_cost_p ()
 264 {
 265   return cost == INFTY;
 266 }
 267
 268 comp_cost
 269 operator+ (comp_cost cost1, comp_cost cost2)
 270 {
 271   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 272     return infinite_cost;
 273
 274   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 275   cost1.cost += cost2.cost;
 276   cost1.complexity += cost2.complexity;
 277
 278   return cost1;
 279 }
 280
 281 comp_cost
 282 operator- (comp_cost cost1, comp_cost cost2)
 283 {
 284   if (cost1.infinite_cost_p ())
 285     return infinite_cost;
 286
 287   gcc_assert (!cost2.infinite_cost_p ());
 288   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 289
 290   cost1.cost -= cost2.cost;
 291   cost1.complexity -= cost2.complexity;
 292
 293   return cost1;
 294 }
 295
 296 comp_cost
 297 comp_cost::operator+= (comp_cost cost)
 298 {
 299   *this = *this + cost;
 300   return *this;
 301 }
 302
 303 comp_cost
 304 comp_cost::operator+= (HOST_WIDE_INT c)
 305 {
 306   if (c >= INFTY)
 307     this->cost = INFTY;
 308
 309   if (infinite_cost_p ())
 310     return *this;
 311
 312   gcc_assert (this->cost + c < infinite_cost.cost);
 313   this->cost += c;
 314
 315   return *this;
 316 }
 317
 318 comp_cost
 319 comp_cost::operator-= (HOST_WIDE_INT c)
 320 {
 321   if (infinite_cost_p ())
 322     return *this;
 323
 324   gcc_assert (this->cost - c < infinite_cost.cost);
 325   this->cost -= c;
 326
 327   return *this;
 328 }
 329
 330 comp_cost
 331 comp_cost::operator/= (HOST_WIDE_INT c)
 332 {
 333   gcc_assert (c != 0);
 334   if (infinite_cost_p ())
 335     return *this;
 336
 337   this->cost /= c;
 338
 339   return *this;
 340 }
 341
 342 comp_cost
 343 comp_cost::operator*= (HOST_WIDE_INT c)
 344 {
 345   if (infinite_cost_p ())
 346     return *this;
 347
 348   gcc_assert (this->cost * c < infinite_cost.cost);
 349   this->cost *= c;
 350
 351   return *this;
 352 }
 353
 354 comp_cost
 355 comp_cost::operator-= (comp_cost cost)
 356 {
 357   *this = *this - cost;
 358   return *this;
 359 }
 360
 361 bool
 362 operator< (comp_cost cost1, comp_cost cost2)
 363 {
 364   if (cost1.cost == cost2.cost)
 365     return cost1.complexity < cost2.complexity;
 366
 367   return cost1.cost < cost2.cost;
 368 }
 369
 370 bool
 371 operator== (comp_cost cost1, comp_cost cost2)
 372 {
 373   return cost1.cost == cost2.cost
 374     && cost1.complexity == cost2.complexity;
 375 }
 376
 377 bool
 378 operator<= (comp_cost cost1, comp_cost cost2)
 379 {
 380   return cost1 < cost2 || cost1 == cost2;
 381 }
 382
 383 struct iv_inv_expr_ent;
 384
 385 /* The candidate - cost pair.  */
 386 class cost_pair
 387 {
 388 public:
 389   struct iv_cand *cand; /* The candidate.  */
 390   comp_cost cost;       /* The cost.  */
 391   enum tree_code comp;  /* For iv elimination, the comparison.  */
 392   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 393                            preserved when representing iv_use with iv_cand.  */
 394   bitmap inv_exprs;     /* The list of newly created invariant expressions
 395                            when representing iv_use with iv_cand.  */
 396   tree value;           /* For final value elimination, the expression for
 397                            the final value of the iv.  For iv elimination,
 398                            the new bound to compare with.  */
 399 };
 400
 401 /* Use.  */
 402 struct iv_use
 403 {
 404   unsigned id;          /* The id of the use.  */
 405   unsigned group_id;    /* The group id the use belongs to.  */
 406   enum use_type type;   /* Type of the use.  */
 407   tree mem_type;        /* The memory type to use when testing whether an
 408                            address is legitimate, and what the address's
 409                            cost is.  */
 410   struct iv *iv;        /* The induction variable it is based on.  */
 411   gimple *stmt;         /* Statement in that it occurs.  */
 412   tree *op_p;           /* The place where it occurs.  */
 413
 414   tree addr_base;       /* Base address with const offset stripped.  */
 415   poly_uint64_pod addr_offset;
 416                         /* Const offset stripped from base address.  */
 417 };
 418
 419 /* Group of uses.  */
 420 struct iv_group
 421 {
 422   /* The id of the group.  */
 423   unsigned id;
 424   /* Uses of the group are of the same type.  */
 425   enum use_type type;
 426   /* The set of "related" IV candidates, plus the important ones.  */
 427   bitmap related_cands;
 428   /* Number of IV candidates in the cost_map.  */
 429   unsigned n_map_members;
 430   /* The costs wrto the iv candidates.  */
 431   class cost_pair *cost_map;
 432   /* The selected candidate for the group.  */
 433   struct iv_cand *selected;
 434   /* To indicate this is a doloop use group.  */
 435   bool doloop_p;
 436   /* Uses in the group.  */
 437   vec<struct iv_use *> vuses;
 438 };
 439
 440 /* The position where the iv is computed.  */
 441 enum iv_position
 442 {
 443   IP_NORMAL,            /* At the end, just before the exit condition.  */
 444   IP_END,               /* At the end of the latch block.  */
 445   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 446   IP_AFTER_USE,         /* Immediately after a specific use.  */
 447   IP_ORIGINAL           /* The original biv.  */
 448 };
 449
 450 /* The induction variable candidate.  */
 451 struct iv_cand
 452 {
 453   unsigned id;          /* The number of the candidate.  */
 454   bool important;       /* Whether this is an "important" candidate, i.e. such
 455                            that it should be considered by all uses.  */
 456   bool involves_undefs; /* Whether the IV involves undefined values.  */
 457   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 458   gimple *incremented_at;/* For original biv, the statement where it is
 459                            incremented.  */
 460   tree var_before;      /* The variable used for it before increment.  */
 461   tree var_after;       /* The variable used for it after increment.  */
 462   struct iv *iv;        /* The value of the candidate.  NULL for
 463                            "pseudocandidate" used to indicate the possibility
 464                            to replace the final value of an iv by direct
 465                            computation of the value.  */
 466   unsigned cost;        /* Cost of the candidate.  */
 467   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 468   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 469                               where it is incremented.  */
 470   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 471                            iv_cand.  */
 472   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 473                            handle it as a new invariant expression which will
 474                            be hoisted out of loop.  */
 475   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 476                            smaller type.  */
 477   bool doloop_p;        /* Whether this is a doloop candidate.  */
 478 };
 479
 480 /* Hashtable entry for common candidate derived from iv uses.  */
 481 class iv_common_cand
 482 {
 483 public:
 484   tree base;
 485   tree step;
 486   /* IV uses from which this common candidate is derived.  */
 487   auto_vec<struct iv_use *> uses;
 488   hashval_t hash;
 489 };
 490
 491 /* Hashtable helpers.  */
 492
 493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 494 {
 495   static inline hashval_t hash (const iv_common_cand *);
 496   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 497 };
 498
 499 /* Hash function for possible common candidates.  */
 500
 501 inline hashval_t
 502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 503 {
 504   return ccand->hash;
 505 }
 506
 507 /* Hash table equality function for common candidates.  */
 508
 509 inline bool
 510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 511                               const iv_common_cand *ccand2)
 512 {
 513   return (ccand1->hash == ccand2->hash
 514           && operand_equal_p (ccand1->base, ccand2->base, 0)
 515           && operand_equal_p (ccand1->step, ccand2->step, 0)
 516           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 517               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 518 }
 519
 520 /* Loop invariant expression hashtable entry.  */
 521
 522 struct iv_inv_expr_ent
 523 {
 524   /* Tree expression of the entry.  */
 525   tree expr;
 526   /* Unique indentifier.  */
 527   int id;
 528   /* Hash value.  */
 529   hashval_t hash;
 530 };
 531
 532 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 533
 534 static int
 535 sort_iv_inv_expr_ent (const void *a, const void *b)
 536 {
 537   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 538   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 539
 540   unsigned id1 = (*e1)->id;
 541   unsigned id2 = (*e2)->id;
 542
 543   if (id1 < id2)
 544     return -1;
 545   else if (id1 > id2)
 546     return 1;
 547   else
 548     return 0;
 549 }
 550
 551 /* Hashtable helpers.  */
 552
 553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 554 {
 555   static inline hashval_t hash (const iv_inv_expr_ent *);
 556   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 557 };
 558
 559 /* Return true if uses of type TYPE represent some form of address.  */
 560
 561 inline bool
 562 address_p (use_type type)
 563 {
 564   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 565 }
 566
 567 /* Hash function for loop invariant expressions.  */
 568
 569 inline hashval_t
 570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 571 {
 572   return expr->hash;
 573 }
 574
 575 /* Hash table equality function for expressions.  */
 576
 577 inline bool
 578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 579                            const iv_inv_expr_ent *expr2)
 580 {
 581   return expr1->hash == expr2->hash
 582          && operand_equal_p (expr1->expr, expr2->expr, 0);
 583 }
 584
 585 struct ivopts_data
 586 {
 587   /* The currently optimized loop.  */
 588   class loop *current_loop;
 589   location_t loop_loc;
 590
 591   /* Numbers of iterations for all exits of the current loop.  */
 592   hash_map<edge, tree_niter_desc *> *niters;
 593
 594   /* Number of registers used in it.  */
 595   unsigned regs_used;
 596
 597   /* The size of version_info array allocated.  */
 598   unsigned version_info_size;
 599
 600   /* The array of information for the ssa names.  */
 601   struct version_info *version_info;
 602
 603   /* The hashtable of loop invariant expressions created
 604      by ivopt.  */
 605   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 606
 607   /* The bitmap of indices in version_info whose value was changed.  */
 608   bitmap relevant;
 609
 610   /* The uses of induction variables.  */
 611   vec<iv_group *> vgroups;
 612
 613   /* The candidates.  */
 614   vec<iv_cand *> vcands;
 615
 616   /* A bitmap of important candidates.  */
 617   bitmap important_candidates;
 618
 619   /* Cache used by tree_to_aff_combination_expand.  */
 620   hash_map<tree, name_expansion *> *name_expansion_cache;
 621
 622   /* The hashtable of common candidates derived from iv uses.  */
 623   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 624
 625   /* The common candidates.  */
 626   vec<iv_common_cand *> iv_common_cands;
 627
 628   /* Hash map recording base object information of tree exp.  */
 629   hash_map<tree, tree> *base_object_map;
 630
 631   /* The maximum invariant variable id.  */
 632   unsigned max_inv_var_id;
 633
 634   /* The maximum invariant expression id.  */
 635   unsigned max_inv_expr_id;
 636
 637   /* Number of no_overflow BIVs which are not used in memory address.  */
 638   unsigned bivs_not_used_in_addr;
 639
 640   /* Obstack for iv structure.  */
 641   struct obstack iv_obstack;
 642
 643   /* Whether to consider just related and important candidates when replacing a
 644      use.  */
 645   bool consider_all_candidates;
 646
 647   /* Are we optimizing for speed?  */
 648   bool speed;
 649
 650   /* Whether the loop body includes any function calls.  */
 651   bool body_includes_call;
 652
 653   /* Whether the loop body can only be exited via single exit.  */
 654   bool loop_single_exit_p;
 655
 656   /* Whether the loop has doloop comparison use.  */
 657   bool doloop_use_p;
 658 };
 659
 660 /* An assignment of iv candidates to uses.  */
 661
 662 class iv_ca
 663 {
 664 public:
 665   /* The number of uses covered by the assignment.  */
 666   unsigned upto;
 667
 668   /* Number of uses that cannot be expressed by the candidates in the set.  */
 669   unsigned bad_groups;
 670
 671   /* Candidate assigned to a use, together with the related costs.  */
 672   class cost_pair **cand_for_group;
 673
 674   /* Number of times each candidate is used.  */
 675   unsigned *n_cand_uses;
 676
 677   /* The candidates used.  */
 678   bitmap cands;
 679
 680   /* The number of candidates in the set.  */
 681   unsigned n_cands;
 682
 683   /* The number of invariants needed, including both invariant variants and
 684      invariant expressions.  */
 685   unsigned n_invs;
 686
 687   /* Total cost of expressing uses.  */
 688   comp_cost cand_use_cost;
 689
 690   /* Total cost of candidates.  */
 691   int64_t cand_cost;
 692
 693   /* Number of times each invariant variable is used.  */
 694   unsigned *n_inv_var_uses;
 695
 696   /* Number of times each invariant expression is used.  */
 697   unsigned *n_inv_expr_uses;
 698
 699   /* Total cost of the assignment.  */
 700   comp_cost cost;
 701 };
 702
 703 /* Difference of two iv candidate assignments.  */
 704
 705 struct iv_ca_delta
 706 {
 707   /* Changed group.  */
 708   struct iv_group *group;
 709
 710   /* An old assignment (for rollback purposes).  */
 711   class cost_pair *old_cp;
 712
 713   /* A new assignment.  */
 714   class cost_pair *new_cp;
 715
 716   /* Next change in the list.  */
 717   struct iv_ca_delta *next;
 718 };
 719
 720 /* Bound on number of candidates below that all candidates are considered.  */
 721
 722 #define CONSIDER_ALL_CANDIDATES_BOUND \
 723   ((unsigned) param_iv_consider_all_candidates_bound)
 724
 725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 726    optimizing such a loop would help, and it would take ages).  */
 727
 728 #define MAX_CONSIDERED_GROUPS \
 729   ((unsigned) param_iv_max_considered_uses)
 730
 731 /* If there are at most this number of ivs in the set, try removing unnecessary
 732    ivs from the set always.  */
 733
 734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 735   ((unsigned) param_iv_always_prune_cand_set_bound)
 736
 737 /* The list of trees for that the decl_rtl field must be reset is stored
 738    here.  */
 739
 740 static vec<tree> decl_rtl_to_reset;
 741
 742 static comp_cost force_expr_to_var_cost (tree, bool);
 743
 744 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 745
 746 edge
 747 single_dom_exit (class loop *loop)
 748 {
 749   edge exit = single_exit (loop);
 750
 751   if (!exit)
 752     return NULL;
 753
 754   if (!just_once_each_iteration_p (loop, exit->src))
 755     return NULL;
 756
 757   return exit;
 758 }
 759
 760 /* Dumps information about the induction variable IV to FILE.  Don't dump
 761    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 762    preceding spaces indicated by INDENT_LEVEL.  */
 763
 764 void
 765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 766 {
 767   const char *p;
 768   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 769
 770   if (indent_level > 4)
 771     indent_level = 4;
 772   p = spaces + 8 - (indent_level << 1);
 773
 774   fprintf (file, "%sIV struct:\n", p);
 775   if (iv->ssa_name && dump_name)
 776     {
 777       fprintf (file, "%s  SSA_NAME:\t", p);
 778       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 779       fprintf (file, "\n");
 780     }
 781
 782   fprintf (file, "%s  Type:\t", p);
 783   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 784   fprintf (file, "\n");
 785
 786   fprintf (file, "%s  Base:\t", p);
 787   print_generic_expr (file, iv->base, TDF_SLIM);
 788   fprintf (file, "\n");
 789
 790   fprintf (file, "%s  Step:\t", p);
 791   print_generic_expr (file, iv->step, TDF_SLIM);
 792   fprintf (file, "\n");
 793
 794   if (iv->base_object)
 795     {
 796       fprintf (file, "%s  Object:\t", p);
 797       print_generic_expr (file, iv->base_object, TDF_SLIM);
 798       fprintf (file, "\n");
 799     }
 800
 801   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 802
 803   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 804            p, iv->no_overflow ? "No-overflow" : "Overflow");
 805 }
 806
 807 /* Dumps information about the USE to FILE.  */
 808
 809 void
 810 dump_use (FILE *file, struct iv_use *use)
 811 {
 812   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 813   fprintf (file, "    At stmt:\t");
 814   print_gimple_stmt (file, use->stmt, 0);
 815   fprintf (file, "    At pos:\t");
 816   if (use->op_p)
 817     print_generic_expr (file, *use->op_p, TDF_SLIM);
 818   fprintf (file, "\n");
 819   dump_iv (file, use->iv, false, 2);
 820 }
 821
 822 /* Dumps information about the uses to FILE.  */
 823
 824 void
 825 dump_groups (FILE *file, struct ivopts_data *data)
 826 {
 827   unsigned i, j;
 828   struct iv_group *group;
 829
 830   for (i = 0; i < data->vgroups.length (); i++)
 831     {
 832       group = data->vgroups[i];
 833       fprintf (file, "Group %d:\n", group->id);
 834       if (group->type == USE_NONLINEAR_EXPR)
 835         fprintf (file, "  Type:\tGENERIC\n");
 836       else if (group->type == USE_REF_ADDRESS)
 837         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 838       else if (group->type == USE_PTR_ADDRESS)
 839         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 840       else
 841         {
 842           gcc_assert (group->type == USE_COMPARE);
 843           fprintf (file, "  Type:\tCOMPARE\n");
 844         }
 845       for (j = 0; j < group->vuses.length (); j++)
 846         dump_use (file, group->vuses[j]);
 847     }
 848 }
 849
 850 /* Dumps information about induction variable candidate CAND to FILE.  */
 851
 852 void
 853 dump_cand (FILE *file, struct iv_cand *cand)
 854 {
 855   struct iv *iv = cand->iv;
 856
 857   fprintf (file, "Candidate %d:\n", cand->id);
 858   if (cand->inv_vars)
 859     {
 860       fprintf (file, "  Depend on inv.vars: ");
 861       dump_bitmap (file, cand->inv_vars);
 862     }
 863   if (cand->inv_exprs)
 864     {
 865       fprintf (file, "  Depend on inv.exprs: ");
 866       dump_bitmap (file, cand->inv_exprs);
 867     }
 868
 869   if (cand->var_before)
 870     {
 871       fprintf (file, "  Var befor: ");
 872       print_generic_expr (file, cand->var_before, TDF_SLIM);
 873       fprintf (file, "\n");
 874     }
 875   if (cand->var_after)
 876     {
 877       fprintf (file, "  Var after: ");
 878       print_generic_expr (file, cand->var_after, TDF_SLIM);
 879       fprintf (file, "\n");
 880     }
 881
 882   switch (cand->pos)
 883     {
 884     case IP_NORMAL:
 885       fprintf (file, "  Incr POS: before exit test\n");
 886       break;
 887
 888     case IP_BEFORE_USE:
 889       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 890       break;
 891
 892     case IP_AFTER_USE:
 893       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 894       break;
 895
 896     case IP_END:
 897       fprintf (file, "  Incr POS: at end\n");
 898       break;
 899
 900     case IP_ORIGINAL:
 901       fprintf (file, "  Incr POS: orig biv\n");
 902       break;
 903     }
 904
 905   dump_iv (file, iv, false, 1);
 906 }
 907
 908 /* Returns the info for ssa version VER.  */
 909
 910 static inline struct version_info *
 911 ver_info (struct ivopts_data *data, unsigned ver)
 912 {
 913   return data->version_info + ver;
 914 }
 915
 916 /* Returns the info for ssa name NAME.  */
 917
 918 static inline struct version_info *
 919 name_info (struct ivopts_data *data, tree name)
 920 {
 921   return ver_info (data, SSA_NAME_VERSION (name));
 922 }
 923
 924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 925    emitted in LOOP.  */
 926
 927 static bool
 928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 929 {
 930   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 931
 932   gcc_assert (bb);
 933
 934   if (sbb == loop->latch)
 935     return true;
 936
 937   if (sbb != bb)
 938     return false;
 939
 940   return stmt == last_nondebug_stmt (bb);
 941 }
 942
 943 /* Returns true if STMT if after the place where the original induction
 944    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 945    if the positions are identical.  */
 946
 947 static bool
 948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 949 {
 950   basic_block cand_bb = gimple_bb (cand->incremented_at);
 951   basic_block stmt_bb = gimple_bb (stmt);
 952
 953   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 954     return false;
 955
 956   if (stmt_bb != cand_bb)
 957     return true;
 958
 959   if (true_if_equal
 960       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 961     return true;
 962   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 963 }
 964
 965 /* Returns true if STMT if after the place where the induction variable
 966    CAND is incremented in LOOP.  */
 967
 968 static bool
 969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 970 {
 971   switch (cand->pos)
 972     {
 973     case IP_END:
 974       return false;
 975
 976     case IP_NORMAL:
 977       return stmt_after_ip_normal_pos (loop, stmt);
 978
 979     case IP_ORIGINAL:
 980     case IP_AFTER_USE:
 981       return stmt_after_inc_pos (cand, stmt, false);
 982
 983     case IP_BEFORE_USE:
 984       return stmt_after_inc_pos (cand, stmt, true);
 985
 986     default:
 987       gcc_unreachable ();
 988     }
 989 }
 990
 991 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 992
 993 static tree
 994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 995 {
 996   if (TREE_CODE (*tp) == SSA_NAME
 997       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 998     return *tp;
 999
1000   if (!EXPR_P (*tp))
1001     *walk_subtrees = 0;
1002
1003   return NULL_TREE;
1004 }
1005
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007    abnormal phi node.  */
1008
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1011 {
1012   return walk_tree_without_duplicates
1013            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1014 }
1015
1016 /*  Returns the structure describing number of iterations determined from
1017     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1018
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1021 {
1022   class tree_niter_desc *desc;
1023   tree_niter_desc **slot;
1024
1025   if (!data->niters)
1026     {
1027       data->niters = new hash_map<edge, tree_niter_desc *>;
1028       slot = NULL;
1029     }
1030   else
1031     slot = data->niters->get (exit);
1032
1033   if (!slot)
1034     {
1035       /* Try to determine number of iterations.  We cannot safely work with ssa
1036          names that appear in phi nodes on abnormal edges, so that we do not
1037          create overlapping life ranges for them (PR 27283).  */
1038       desc = XNEW (class tree_niter_desc);
1039       if (!number_of_iterations_exit (data->current_loop,
1040                                       exit, desc, true)
1041           || contains_abnormal_ssa_name_p (desc->niter))
1042         {
1043           XDELETE (desc);
1044           desc = NULL;
1045         }
1046       data->niters->put (exit, desc);
1047     }
1048   else
1049     desc = *slot;
1050
1051   return desc;
1052 }
1053
1054 /* Returns the structure describing number of iterations determined from
1055    single dominating exit of DATA->current_loop, or NULL if something
1056    goes wrong.  */
1057
1058 static class tree_niter_desc *
1059 niter_for_single_dom_exit (struct ivopts_data *data)
1060 {
1061   edge exit = single_dom_exit (data->current_loop);
1062
1063   if (!exit)
1064     return NULL;
1065
1066   return niter_for_exit (data, exit);
1067 }
1068
1069 /* Initializes data structures used by the iv optimization pass, stored
1070    in DATA.  */
1071
1072 static void
1073 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1074 {
1075   data->version_info_size = 2 * num_ssa_names;
1076   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1077   data->relevant = BITMAP_ALLOC (NULL);
1078   data->important_candidates = BITMAP_ALLOC (NULL);
1079   data->max_inv_var_id = 0;
1080   data->max_inv_expr_id = 0;
1081   data->niters = NULL;
1082   data->vgroups.create (20);
1083   data->vcands.create (20);
1084   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1085   data->name_expansion_cache = NULL;
1086   data->base_object_map = NULL;
1087   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1088   data->iv_common_cands.create (20);
1089   decl_rtl_to_reset.create (20);
1090   gcc_obstack_init (&data->iv_obstack);
1091 }
1092
1093 /* walk_tree callback for determine_base_object.  */
1094
1095 static tree
1096 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1097 {
1098   tree_code code = TREE_CODE (*tp);
1099   tree obj = NULL_TREE;
1100   if (code == ADDR_EXPR)
1101     {
1102       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1103       if (!base)
1104         obj = *tp;
1105       else if (TREE_CODE (base) != MEM_REF)
1106         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1107     }
1108   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1109         obj = fold_convert (ptr_type_node, *tp);
1110
1111   if (!obj)
1112     {
1113       if (!EXPR_P (*tp))
1114         *walk_subtrees = 0;
1115
1116       return NULL_TREE;
1117     }
1118   /* Record special node for multiple base objects and stop.  */
1119   if (*static_cast<tree *> (wdata))
1120     {
1121       *static_cast<tree *> (wdata) = integer_zero_node;
1122       return integer_zero_node;
1123     }
1124   /* Record the base object and continue looking.  */
1125   *static_cast<tree *> (wdata) = obj;
1126   return NULL_TREE;
1127 }
1128
1129 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1130    are able to determine that it does not point to any such object; specially
1131    return integer_zero_node if EXPR contains multiple base objects.  */
1132
1133 static tree
1134 determine_base_object (struct ivopts_data *data, tree expr)
1135 {
1136   tree *slot, obj = NULL_TREE;
1137   if (data->base_object_map)
1138     {
1139       if ((slot = data->base_object_map->get(expr)) != NULL)
1140         return *slot;
1141     }
1142   else
1143     data->base_object_map = new hash_map<tree, tree>;
1144
1145   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1146   data->base_object_map->put (expr, obj);
1147   return obj;
1148 }
1149
1150 /* Return true if address expression with non-DECL_P operand appears
1151    in EXPR.  */
1152
1153 static bool
1154 contain_complex_addr_expr (tree expr)
1155 {
1156   bool res = false;
1157
1158   STRIP_NOPS (expr);
1159   switch (TREE_CODE (expr))
1160     {
1161     case POINTER_PLUS_EXPR:
1162     case PLUS_EXPR:
1163     case MINUS_EXPR:
1164       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1165       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1166       break;
1167
1168     case ADDR_EXPR:
1169       return (!DECL_P (TREE_OPERAND (expr, 0)));
1170
1171     default:
1172       return false;
1173     }
1174
1175   return res;
1176 }
1177
1178 /* Allocates an induction variable with given initial value BASE and step STEP
1179    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1180
1181 static struct iv *
1182 alloc_iv (struct ivopts_data *data, tree base, tree step,
1183           bool no_overflow = false)
1184 {
1185   tree expr = base;
1186   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1187                                               sizeof (struct iv));
1188   gcc_assert (step != NULL_TREE);
1189
1190   /* Lower address expression in base except ones with DECL_P as operand.
1191      By doing this:
1192        1) More accurate cost can be computed for address expressions;
1193        2) Duplicate candidates won't be created for bases in different
1194           forms, like &a[0] and &a.  */
1195   STRIP_NOPS (expr);
1196   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1197       || contain_complex_addr_expr (expr))
1198     {
1199       aff_tree comb;
1200       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1201       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1202     }
1203
1204   iv->base = base;
1205   iv->base_object = determine_base_object (data, base);
1206   iv->step = step;
1207   iv->biv_p = false;
1208   iv->nonlin_use = NULL;
1209   iv->ssa_name = NULL_TREE;
1210   if (!no_overflow
1211        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1212                               base, step))
1213     no_overflow = true;
1214   iv->no_overflow = no_overflow;
1215   iv->have_address_use = false;
1216
1217   return iv;
1218 }
1219
1220 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1221    doesn't overflow.  */
1222
1223 static void
1224 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1225         bool no_overflow)
1226 {
1227   struct version_info *info = name_info (data, iv);
1228
1229   gcc_assert (!info->iv);
1230
1231   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1232   info->iv = alloc_iv (data, base, step, no_overflow);
1233   info->iv->ssa_name = iv;
1234 }
1235
1236 /* Finds induction variable declaration for VAR.  */
1237
1238 static struct iv *
1239 get_iv (struct ivopts_data *data, tree var)
1240 {
1241   basic_block bb;
1242   tree type = TREE_TYPE (var);
1243
1244   if (!POINTER_TYPE_P (type)
1245       && !INTEGRAL_TYPE_P (type))
1246     return NULL;
1247
1248   if (!name_info (data, var)->iv)
1249     {
1250       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1251
1252       if (!bb
1253           || !flow_bb_inside_loop_p (data->current_loop, bb))
1254         {
1255           if (POINTER_TYPE_P (type))
1256             type = sizetype;
1257           set_iv (data, var, var, build_int_cst (type, 0), true);
1258         }
1259     }
1260
1261   return name_info (data, var)->iv;
1262 }
1263
1264 /* Return the first non-invariant ssa var found in EXPR.  */
1265
1266 static tree
1267 extract_single_var_from_expr (tree expr)
1268 {
1269   int i, n;
1270   tree tmp;
1271   enum tree_code code;
1272
1273   if (!expr || is_gimple_min_invariant (expr))
1274     return NULL;
1275
1276   code = TREE_CODE (expr);
1277   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1278     {
1279       n = TREE_OPERAND_LENGTH (expr);
1280       for (i = 0; i < n; i++)
1281         {
1282           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1283
1284           if (tmp)
1285             return tmp;
1286         }
1287     }
1288   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1289 }
1290
1291 /* Finds basic ivs.  */
1292
1293 static bool
1294 find_bivs (struct ivopts_data *data)
1295 {
1296   gphi *phi;
1297   affine_iv iv;
1298   tree step, type, base, stop;
1299   bool found = false;
1300   class loop *loop = data->current_loop;
1301   gphi_iterator psi;
1302
1303   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1304     {
1305       phi = psi.phi ();
1306
1307       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1308         continue;
1309
1310       if (virtual_operand_p (PHI_RESULT (phi)))
1311         continue;
1312
1313       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1314         continue;
1315
1316       if (integer_zerop (iv.step))
1317         continue;
1318
1319       step = iv.step;
1320       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1321       /* Stop expanding iv base at the first ssa var referred by iv step.
1322          Ideally we should stop at any ssa var, because that's expensive
1323          and unusual to happen, we just do it on the first one.
1324
1325          See PR64705 for the rationale.  */
1326       stop = extract_single_var_from_expr (step);
1327       base = expand_simple_operations (base, stop);
1328       if (contains_abnormal_ssa_name_p (base)
1329           || contains_abnormal_ssa_name_p (step))
1330         continue;
1331
1332       type = TREE_TYPE (PHI_RESULT (phi));
1333       base = fold_convert (type, base);
1334       if (step)
1335         {
1336           if (POINTER_TYPE_P (type))
1337             step = convert_to_ptrofftype (step);
1338           else
1339             step = fold_convert (type, step);
1340         }
1341
1342       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1343       found = true;
1344     }
1345
1346   return found;
1347 }
1348
1349 /* Marks basic ivs.  */
1350
1351 static void
1352 mark_bivs (struct ivopts_data *data)
1353 {
1354   gphi *phi;
1355   gimple *def;
1356   tree var;
1357   struct iv *iv, *incr_iv;
1358   class loop *loop = data->current_loop;
1359   basic_block incr_bb;
1360   gphi_iterator psi;
1361
1362   data->bivs_not_used_in_addr = 0;
1363   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1364     {
1365       phi = psi.phi ();
1366
1367       iv = get_iv (data, PHI_RESULT (phi));
1368       if (!iv)
1369         continue;
1370
1371       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1372       def = SSA_NAME_DEF_STMT (var);
1373       /* Don't mark iv peeled from other one as biv.  */
1374       if (def
1375           && gimple_code (def) == GIMPLE_PHI
1376           && gimple_bb (def) == loop->header)
1377         continue;
1378
1379       incr_iv = get_iv (data, var);
1380       if (!incr_iv)
1381         continue;
1382
1383       /* If the increment is in the subloop, ignore it.  */
1384       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1385       if (incr_bb->loop_father != data->current_loop
1386           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1387         continue;
1388
1389       iv->biv_p = true;
1390       incr_iv->biv_p = true;
1391       if (iv->no_overflow)
1392         data->bivs_not_used_in_addr++;
1393       if (incr_iv->no_overflow)
1394         data->bivs_not_used_in_addr++;
1395     }
1396 }
1397
1398 /* Checks whether STMT defines a linear induction variable and stores its
1399    parameters to IV.  */
1400
1401 static bool
1402 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1403 {
1404   tree lhs, stop;
1405   class loop *loop = data->current_loop;
1406
1407   iv->base = NULL_TREE;
1408   iv->step = NULL_TREE;
1409
1410   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1411     return false;
1412
1413   lhs = gimple_assign_lhs (stmt);
1414   if (TREE_CODE (lhs) != SSA_NAME)
1415     return false;
1416
1417   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1418     return false;
1419
1420   /* Stop expanding iv base at the first ssa var referred by iv step.
1421      Ideally we should stop at any ssa var, because that's expensive
1422      and unusual to happen, we just do it on the first one.
1423
1424      See PR64705 for the rationale.  */
1425   stop = extract_single_var_from_expr (iv->step);
1426   iv->base = expand_simple_operations (iv->base, stop);
1427   if (contains_abnormal_ssa_name_p (iv->base)
1428       || contains_abnormal_ssa_name_p (iv->step))
1429     return false;
1430
1431   /* If STMT could throw, then do not consider STMT as defining a GIV.
1432      While this will suppress optimizations, we cannot safely delete this
1433      GIV and associated statements, even if it appears it is not used.  */
1434   if (stmt_could_throw_p (cfun, stmt))
1435     return false;
1436
1437   return true;
1438 }
1439
1440 /* Finds general ivs in statement STMT.  */
1441
1442 static void
1443 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1444 {
1445   affine_iv iv;
1446
1447   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1448     return;
1449
1450   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1451 }
1452
1453 /* Finds general ivs in basic block BB.  */
1454
1455 static void
1456 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1457 {
1458   gimple_stmt_iterator bsi;
1459
1460   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1461     find_givs_in_stmt (data, gsi_stmt (bsi));
1462 }
1463
1464 /* Finds general ivs.  */
1465
1466 static void
1467 find_givs (struct ivopts_data *data, basic_block *body)
1468 {
1469   class loop *loop = data->current_loop;
1470   unsigned i;
1471
1472   for (i = 0; i < loop->num_nodes; i++)
1473     find_givs_in_bb (data, body[i]);
1474 }
1475
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477    variable and if so, its initial value and step.  */
1478
1479 static bool
1480 find_induction_variables (struct ivopts_data *data, basic_block *body)
1481 {
1482   unsigned i;
1483   bitmap_iterator bi;
1484
1485   if (!find_bivs (data))
1486     return false;
1487
1488   find_givs (data, body);
1489   mark_bivs (data);
1490
1491   if (dump_file && (dump_flags & TDF_DETAILS))
1492     {
1493       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1494
1495       if (niter)
1496         {
1497           fprintf (dump_file, "  number of iterations ");
1498           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499           if (!integer_zerop (niter->may_be_zero))
1500             {
1501               fprintf (dump_file, "; zero if ");
1502               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1503             }
1504           fprintf (dump_file, "\n");
1505         };
1506
1507       fprintf (dump_file, "\n<Induction Vars>:\n");
1508       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1509         {
1510           struct version_info *info = ver_info (data, i);
1511           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1513         }
1514     }
1515
1516   return true;
1517 }
1518
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521    is the const offset stripped from IV base and MEM_TYPE is the type
1522    of the memory being addressed.  For uses of other types, ADDR_BASE
1523    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1524
1525 static struct iv_use *
1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527             gimple *stmt, enum use_type type, tree mem_type,
1528             tree addr_base, poly_uint64 addr_offset)
1529 {
1530   struct iv_use *use = XCNEW (struct iv_use);
1531
1532   use->id = group->vuses.length ();
1533   use->group_id = group->id;
1534   use->type = type;
1535   use->mem_type = mem_type;
1536   use->iv = iv;
1537   use->stmt = stmt;
1538   use->op_p = use_p;
1539   use->addr_base = addr_base;
1540   use->addr_offset = addr_offset;
1541
1542   group->vuses.safe_push (use);
1543   return use;
1544 }
1545
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547    NONLINEAR_USE is true if the invariant is used in a way we do not
1548    handle specially.  */
1549
1550 static void
1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1552 {
1553   basic_block bb;
1554   struct version_info *info;
1555
1556   if (TREE_CODE (op) != SSA_NAME
1557       || virtual_operand_p (op))
1558     return;
1559
1560   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561   if (bb
1562       && flow_bb_inside_loop_p (data->current_loop, bb))
1563     return;
1564
1565   info = name_info (data, op);
1566   info->name = op;
1567   info->has_nonlin_use |= nonlinear_use;
1568   if (!info->inv_id)
1569     info->inv_id = ++data->max_inv_var_id;
1570   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1571 }
1572
1573 /* Record a group of TYPE.  */
1574
1575 static struct iv_group *
1576 record_group (struct ivopts_data *data, enum use_type type)
1577 {
1578   struct iv_group *group = XCNEW (struct iv_group);
1579
1580   group->id = data->vgroups.length ();
1581   group->type = type;
1582   group->related_cands = BITMAP_ALLOC (NULL);
1583   group->vuses.create (1);
1584   group->doloop_p = false;
1585
1586   data->vgroups.safe_push (group);
1587   return group;
1588 }
1589
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591    New group will be created if there is no existing group for the use.
1592    MEM_TYPE is the type of memory being addressed, or NULL if this
1593    isn't an address reference.  */
1594
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597                   struct iv *iv, gimple *stmt, enum use_type type,
1598                   tree mem_type)
1599 {
1600   tree addr_base = NULL;
1601   struct iv_group *group = NULL;
1602   poly_uint64 addr_offset = 0;
1603
1604   /* Record non address type use in a new group.  */
1605   if (address_p (type))
1606     {
1607       unsigned int i;
1608
1609       gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1610       tree addr_toffset;
1611       split_constant_offset (iv->base, &addr_base, &addr_toffset);
1612       addr_offset = int_cst_value (addr_toffset);
1613       for (i = 0; i < data->vgroups.length (); i++)
1614         {
1615           struct iv_use *use;
1616
1617           group = data->vgroups[i];
1618           use = group->vuses[0];
1619           if (!address_p (use->type))
1620             continue;
1621
1622           /* Check if it has the same stripped base and step.  */
1623           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1624               && operand_equal_p (iv->step, use->iv->step, 0)
1625               && operand_equal_p (addr_base, use->addr_base, 0))
1626             break;
1627         }
1628       if (i == data->vgroups.length ())
1629         group = NULL;
1630     }
1631
1632   if (!group)
1633     group = record_group (data, type);
1634
1635   return record_use (group, use_p, iv, stmt, type, mem_type,
1636                      addr_base, addr_offset);
1637 }
1638
1639 /* Checks whether the use OP is interesting and if so, records it.  */
1640
1641 static struct iv_use *
1642 find_interesting_uses_op (struct ivopts_data *data, tree op)
1643 {
1644   struct iv *iv;
1645   gimple *stmt;
1646   struct iv_use *use;
1647
1648   if (TREE_CODE (op) != SSA_NAME)
1649     return NULL;
1650
1651   iv = get_iv (data, op);
1652   if (!iv)
1653     return NULL;
1654
1655   if (iv->nonlin_use)
1656     {
1657       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1658       return iv->nonlin_use;
1659     }
1660
1661   if (integer_zerop (iv->step))
1662     {
1663       record_invariant (data, op, true);
1664       return NULL;
1665     }
1666
1667   stmt = SSA_NAME_DEF_STMT (op);
1668   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1669
1670   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1671   iv->nonlin_use = use;
1672   return use;
1673 }
1674
1675 /* Indicate how compare type iv_use can be handled.  */
1676 enum comp_iv_rewrite
1677 {
1678   COMP_IV_NA,
1679   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1680   COMP_IV_EXPR,
1681   /* We may rewrite compare type iv_uses on both sides of comparison by
1682      expressing value of each iv_use.  */
1683   COMP_IV_EXPR_2,
1684   /* We may rewrite compare type iv_use by expressing value of the iv_use
1685      or by eliminating it with other iv_cand.  */
1686   COMP_IV_ELIM
1687 };
1688
1689 /* Given a condition in statement STMT, checks whether it is a compare
1690    of an induction variable and an invariant.  If this is the case,
1691    CONTROL_VAR is set to location of the iv, BOUND to the location of
1692    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1693    induction variable descriptions, and true is returned.  If this is not
1694    the case, CONTROL_VAR and BOUND are set to the arguments of the
1695    condition and false is returned.  */
1696
1697 static enum comp_iv_rewrite
1698 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1699                        tree **control_var, tree **bound,
1700                        struct iv **iv_var, struct iv **iv_bound)
1701 {
1702   /* The objects returned when COND has constant operands.  */
1703   static struct iv const_iv;
1704   static tree zero;
1705   tree *op0 = &zero, *op1 = &zero;
1706   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1707   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1708
1709   if (gimple_code (stmt) == GIMPLE_COND)
1710     {
1711       gcond *cond_stmt = as_a <gcond *> (stmt);
1712       op0 = gimple_cond_lhs_ptr (cond_stmt);
1713       op1 = gimple_cond_rhs_ptr (cond_stmt);
1714     }
1715   else
1716     {
1717       op0 = gimple_assign_rhs1_ptr (stmt);
1718       op1 = gimple_assign_rhs2_ptr (stmt);
1719     }
1720
1721   zero = integer_zero_node;
1722   const_iv.step = integer_zero_node;
1723
1724   if (TREE_CODE (*op0) == SSA_NAME)
1725     iv0 = get_iv (data, *op0);
1726   if (TREE_CODE (*op1) == SSA_NAME)
1727     iv1 = get_iv (data, *op1);
1728
1729   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1730   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1731     {
1732       rewrite_type = COMP_IV_EXPR_2;
1733       goto end;
1734     }
1735
1736   /* If none side of comparison is IV.  */
1737   if ((!iv0 || integer_zerop (iv0->step))
1738       && (!iv1 || integer_zerop (iv1->step)))
1739     goto end;
1740
1741   /* Control variable may be on the other side.  */
1742   if (!iv0 || integer_zerop (iv0->step))
1743     {
1744       std::swap (op0, op1);
1745       std::swap (iv0, iv1);
1746     }
1747   /* If one side is IV and the other side isn't loop invariant.  */
1748   if (!iv1)
1749     rewrite_type = COMP_IV_EXPR;
1750   /* If one side is IV and the other side is loop invariant.  */
1751   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1752     rewrite_type = COMP_IV_ELIM;
1753
1754 end:
1755   if (control_var)
1756     *control_var = op0;
1757   if (iv_var)
1758     *iv_var = iv0;
1759   if (bound)
1760     *bound = op1;
1761   if (iv_bound)
1762     *iv_bound = iv1;
1763
1764   return rewrite_type;
1765 }
1766
1767 /* Checks whether the condition in STMT is interesting and if so,
1768    records it.  */
1769
1770 static void
1771 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1772 {
1773   tree *var_p, *bound_p;
1774   struct iv *var_iv, *bound_iv;
1775   enum comp_iv_rewrite ret;
1776
1777   ret = extract_cond_operands (data, stmt,
1778                                &var_p, &bound_p, &var_iv, &bound_iv);
1779   if (ret == COMP_IV_NA)
1780     {
1781       find_interesting_uses_op (data, *var_p);
1782       find_interesting_uses_op (data, *bound_p);
1783       return;
1784     }
1785
1786   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1787   /* Record compare type iv_use for iv on the other side of comparison.  */
1788   if (ret == COMP_IV_EXPR_2)
1789     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1790 }
1791
1792 /* Returns the outermost loop EXPR is obviously invariant in
1793    relative to the loop LOOP, i.e. if all its operands are defined
1794    outside of the returned loop.  Returns NULL if EXPR is not
1795    even obviously invariant in LOOP.  */
1796
1797 class loop *
1798 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1799 {
1800   basic_block def_bb;
1801   unsigned i, len;
1802
1803   if (is_gimple_min_invariant (expr))
1804     return current_loops->tree_root;
1805
1806   if (TREE_CODE (expr) == SSA_NAME)
1807     {
1808       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1809       if (def_bb)
1810         {
1811           if (flow_bb_inside_loop_p (loop, def_bb))
1812             return NULL;
1813           return superloop_at_depth (loop,
1814                                      loop_depth (def_bb->loop_father) + 1);
1815         }
1816
1817       return current_loops->tree_root;
1818     }
1819
1820   if (!EXPR_P (expr))
1821     return NULL;
1822
1823   unsigned maxdepth = 0;
1824   len = TREE_OPERAND_LENGTH (expr);
1825   for (i = 0; i < len; i++)
1826     {
1827       class loop *ivloop;
1828       if (!TREE_OPERAND (expr, i))
1829         continue;
1830
1831       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1832       if (!ivloop)
1833         return NULL;
1834       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1835     }
1836
1837   return superloop_at_depth (loop, maxdepth);
1838 }
1839
1840 /* Returns true if expression EXPR is obviously invariant in LOOP,
1841    i.e. if all its operands are defined outside of the LOOP.  LOOP
1842    should not be the function body.  */
1843
1844 bool
1845 expr_invariant_in_loop_p (class loop *loop, tree expr)
1846 {
1847   basic_block def_bb;
1848   unsigned i, len;
1849
1850   gcc_assert (loop_depth (loop) > 0);
1851
1852   if (is_gimple_min_invariant (expr))
1853     return true;
1854
1855   if (TREE_CODE (expr) == SSA_NAME)
1856     {
1857       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1858       if (def_bb
1859           && flow_bb_inside_loop_p (loop, def_bb))
1860         return false;
1861
1862       return true;
1863     }
1864
1865   if (!EXPR_P (expr))
1866     return false;
1867
1868   len = TREE_OPERAND_LENGTH (expr);
1869   for (i = 0; i < len; i++)
1870     if (TREE_OPERAND (expr, i)
1871         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1872       return false;
1873
1874   return true;
1875 }
1876
1877 /* Given expression EXPR which computes inductive values with respect
1878    to loop recorded in DATA, this function returns biv from which EXPR
1879    is derived by tracing definition chains of ssa variables in EXPR.  */
1880
1881 static struct iv*
1882 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1883 {
1884   struct iv *iv;
1885   unsigned i, n;
1886   tree e2, e1;
1887   enum tree_code code;
1888   gimple *stmt;
1889
1890   if (expr == NULL_TREE)
1891     return NULL;
1892
1893   if (is_gimple_min_invariant (expr))
1894     return NULL;
1895
1896   code = TREE_CODE (expr);
1897   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1898     {
1899       n = TREE_OPERAND_LENGTH (expr);
1900       for (i = 0; i < n; i++)
1901         {
1902           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1903           if (iv)
1904             return iv;
1905         }
1906     }
1907
1908   /* Stop if it's not ssa name.  */
1909   if (code != SSA_NAME)
1910     return NULL;
1911
1912   iv = get_iv (data, expr);
1913   if (!iv || integer_zerop (iv->step))
1914     return NULL;
1915   else if (iv->biv_p)
1916     return iv;
1917
1918   stmt = SSA_NAME_DEF_STMT (expr);
1919   if (gphi *phi = dyn_cast <gphi *> (stmt))
1920     {
1921       ssa_op_iter iter;
1922       use_operand_p use_p;
1923       basic_block phi_bb = gimple_bb (phi);
1924
1925       /* Skip loop header PHI that doesn't define biv.  */
1926       if (phi_bb->loop_father == data->current_loop)
1927         return NULL;
1928
1929       if (virtual_operand_p (gimple_phi_result (phi)))
1930         return NULL;
1931
1932       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1933         {
1934           tree use = USE_FROM_PTR (use_p);
1935           iv = find_deriving_biv_for_expr (data, use);
1936           if (iv)
1937             return iv;
1938         }
1939       return NULL;
1940     }
1941   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1942     return NULL;
1943
1944   e1 = gimple_assign_rhs1 (stmt);
1945   code = gimple_assign_rhs_code (stmt);
1946   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1947     return find_deriving_biv_for_expr (data, e1);
1948
1949   switch (code)
1950     {
1951     case MULT_EXPR:
1952     case PLUS_EXPR:
1953     case MINUS_EXPR:
1954     case POINTER_PLUS_EXPR:
1955       /* Increments, decrements and multiplications by a constant
1956          are simple.  */
1957       e2 = gimple_assign_rhs2 (stmt);
1958       iv = find_deriving_biv_for_expr (data, e2);
1959       if (iv)
1960         return iv;
1961       gcc_fallthrough ();
1962
1963     CASE_CONVERT:
1964       /* Casts are simple.  */
1965       return find_deriving_biv_for_expr (data, e1);
1966
1967     default:
1968       break;
1969     }
1970
1971   return NULL;
1972 }
1973
1974 /* Record BIV, its predecessor and successor that they are used in
1975    address type uses.  */
1976
1977 static void
1978 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1979 {
1980   unsigned i;
1981   tree type, base_1, base_2;
1982   bitmap_iterator bi;
1983
1984   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1985       || biv->have_address_use || !biv->no_overflow)
1986     return;
1987
1988   type = TREE_TYPE (biv->base);
1989   if (!INTEGRAL_TYPE_P (type))
1990     return;
1991
1992   biv->have_address_use = true;
1993   data->bivs_not_used_in_addr--;
1994   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1995   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1996     {
1997       struct iv *iv = ver_info (data, i)->iv;
1998
1999       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2000           || iv->have_address_use || !iv->no_overflow)
2001         continue;
2002
2003       if (type != TREE_TYPE (iv->base)
2004           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2005         continue;
2006
2007       if (!operand_equal_p (biv->step, iv->step, 0))
2008         continue;
2009
2010       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2011       if (operand_equal_p (base_1, iv->base, 0)
2012           || operand_equal_p (base_2, biv->base, 0))
2013         {
2014           iv->have_address_use = true;
2015           data->bivs_not_used_in_addr--;
2016         }
2017     }
2018 }
2019
2020 /* Cumulates the steps of indices into DATA and replaces their values with the
2021    initial ones.  Returns false when the value of the index cannot be determined.
2022    Callback for for_each_index.  */
2023
2024 struct ifs_ivopts_data
2025 {
2026   struct ivopts_data *ivopts_data;
2027   gimple *stmt;
2028   tree step;
2029 };
2030
2031 static bool
2032 idx_find_step (tree base, tree *idx, void *data)
2033 {
2034   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2035   struct iv *iv;
2036   bool use_overflow_semantics = false;
2037   tree step, iv_base, iv_step, lbound, off;
2038   class loop *loop = dta->ivopts_data->current_loop;
2039
2040   /* If base is a component ref, require that the offset of the reference
2041      be invariant.  */
2042   if (TREE_CODE (base) == COMPONENT_REF)
2043     {
2044       off = component_ref_field_offset (base);
2045       return expr_invariant_in_loop_p (loop, off);
2046     }
2047
2048   /* If base is array, first check whether we will be able to move the
2049      reference out of the loop (in order to take its address in strength
2050      reduction).  In order for this to work we need both lower bound
2051      and step to be loop invariants.  */
2052   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2053     {
2054       /* Moreover, for a range, the size needs to be invariant as well.  */
2055       if (TREE_CODE (base) == ARRAY_RANGE_REF
2056           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2057         return false;
2058
2059       step = array_ref_element_size (base);
2060       lbound = array_ref_low_bound (base);
2061
2062       if (!expr_invariant_in_loop_p (loop, step)
2063           || !expr_invariant_in_loop_p (loop, lbound))
2064         return false;
2065     }
2066
2067   if (TREE_CODE (*idx) != SSA_NAME)
2068     return true;
2069
2070   iv = get_iv (dta->ivopts_data, *idx);
2071   if (!iv)
2072     return false;
2073
2074   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2075           *&x[0], which is not folded and does not trigger the
2076           ARRAY_REF path below.  */
2077   *idx = iv->base;
2078
2079   if (integer_zerop (iv->step))
2080     return true;
2081
2082   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2083     {
2084       step = array_ref_element_size (base);
2085
2086       /* We only handle addresses whose step is an integer constant.  */
2087       if (TREE_CODE (step) != INTEGER_CST)
2088         return false;
2089     }
2090   else
2091     /* The step for pointer arithmetics already is 1 byte.  */
2092     step = size_one_node;
2093
2094   iv_base = iv->base;
2095   iv_step = iv->step;
2096   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2097     use_overflow_semantics = true;
2098
2099   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2100                             sizetype, &iv_base, &iv_step, dta->stmt,
2101                             use_overflow_semantics))
2102     {
2103       /* The index might wrap.  */
2104       return false;
2105     }
2106
2107   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2108   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2109
2110   if (dta->ivopts_data->bivs_not_used_in_addr)
2111     {
2112       if (!iv->biv_p)
2113         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2114
2115       record_biv_for_address_use (dta->ivopts_data, iv);
2116     }
2117   return true;
2118 }
2119
2120 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2121    object is passed to it in DATA.  */
2122
2123 static bool
2124 idx_record_use (tree base, tree *idx,
2125                 void *vdata)
2126 {
2127   struct ivopts_data *data = (struct ivopts_data *) vdata;
2128   find_interesting_uses_op (data, *idx);
2129   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2130     {
2131       if (TREE_OPERAND (base, 2))
2132         find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2133       if (TREE_OPERAND (base, 3))
2134         find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2135     }
2136   return true;
2137 }
2138
2139 /* If we can prove that TOP = cst * BOT for some constant cst,
2140    store cst to MUL and return true.  Otherwise return false.
2141    The returned value is always sign-extended, regardless of the
2142    signedness of TOP and BOT.  */
2143
2144 static bool
2145 constant_multiple_of (tree top, tree bot, widest_int *mul)
2146 {
2147   tree mby;
2148   enum tree_code code;
2149   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2150   widest_int res, p0, p1;
2151
2152   STRIP_NOPS (top);
2153   STRIP_NOPS (bot);
2154
2155   if (operand_equal_p (top, bot, 0))
2156     {
2157       *mul = 1;
2158       return true;
2159     }
2160
2161   code = TREE_CODE (top);
2162   switch (code)
2163     {
2164     case MULT_EXPR:
2165       mby = TREE_OPERAND (top, 1);
2166       if (TREE_CODE (mby) != INTEGER_CST)
2167         return false;
2168
2169       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2170         return false;
2171
2172       *mul = wi::sext (res * wi::to_widest (mby), precision);
2173       return true;
2174
2175     case PLUS_EXPR:
2176     case MINUS_EXPR:
2177       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2178           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2179         return false;
2180
2181       if (code == MINUS_EXPR)
2182         p1 = -p1;
2183       *mul = wi::sext (p0 + p1, precision);
2184       return true;
2185
2186     case INTEGER_CST:
2187       if (TREE_CODE (bot) != INTEGER_CST)
2188         return false;
2189
2190       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2191       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2192       if (p1 == 0)
2193         return false;
2194       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2195       return res == 0;
2196
2197     default:
2198       if (POLY_INT_CST_P (top)
2199           && POLY_INT_CST_P (bot)
2200           && constant_multiple_p (wi::to_poly_widest (top),
2201                                   wi::to_poly_widest (bot), mul))
2202         return true;
2203
2204       return false;
2205     }
2206 }
2207
2208 /* Return true if memory reference REF with step STEP may be unaligned.  */
2209
2210 static bool
2211 may_be_unaligned_p (tree ref, tree step)
2212 {
2213   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2214      thus they are not misaligned.  */
2215   if (TREE_CODE (ref) == TARGET_MEM_REF)
2216     return false;
2217
2218   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2219   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2220     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2221
2222   unsigned HOST_WIDE_INT bitpos;
2223   unsigned int ref_align;
2224   get_object_alignment_1 (ref, &ref_align, &bitpos);
2225   if (ref_align < align
2226       || (bitpos % align) != 0
2227       || (bitpos % BITS_PER_UNIT) != 0)
2228     return true;
2229
2230   unsigned int trailing_zeros = tree_ctz (step);
2231   if (trailing_zeros < HOST_BITS_PER_INT
2232       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2233     return true;
2234
2235   return false;
2236 }
2237
2238 /* Return true if EXPR may be non-addressable.   */
2239
2240 bool
2241 may_be_nonaddressable_p (tree expr)
2242 {
2243   switch (TREE_CODE (expr))
2244     {
2245     case VAR_DECL:
2246       /* Check if it's a register variable.  */
2247       return DECL_HARD_REGISTER (expr);
2248
2249     case TARGET_MEM_REF:
2250       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2251          target, thus they are always addressable.  */
2252       return false;
2253
2254     case MEM_REF:
2255       /* Likewise for MEM_REFs, modulo the storage order.  */
2256       return REF_REVERSE_STORAGE_ORDER (expr);
2257
2258     case BIT_FIELD_REF:
2259       if (REF_REVERSE_STORAGE_ORDER (expr))
2260         return true;
2261       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2262
2263     case COMPONENT_REF:
2264       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2265         return true;
2266       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2267              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2268
2269     case ARRAY_REF:
2270     case ARRAY_RANGE_REF:
2271       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2272         return true;
2273       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2274
2275     case VIEW_CONVERT_EXPR:
2276       /* This kind of view-conversions may wrap non-addressable objects
2277          and make them look addressable.  After some processing the
2278          non-addressability may be uncovered again, causing ADDR_EXPRs
2279          of inappropriate objects to be built.  */
2280       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2281           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2282         return true;
2283       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2284
2285     CASE_CONVERT:
2286       return true;
2287
2288     default:
2289       break;
2290     }
2291
2292   return false;
2293 }
2294
2295 /* Finds addresses in *OP_P inside STMT.  */
2296
2297 static void
2298 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2299                                tree *op_p)
2300 {
2301   tree base = *op_p, step = size_zero_node;
2302   struct iv *civ;
2303   struct ifs_ivopts_data ifs_ivopts_data;
2304
2305   /* Do not play with volatile memory references.  A bit too conservative,
2306      perhaps, but safe.  */
2307   if (gimple_has_volatile_ops (stmt))
2308     goto fail;
2309
2310   /* Ignore bitfields for now.  Not really something terribly complicated
2311      to handle.  TODO.  */
2312   if (TREE_CODE (base) == BIT_FIELD_REF)
2313     goto fail;
2314
2315   base = unshare_expr (base);
2316
2317   if (TREE_CODE (base) == TARGET_MEM_REF)
2318     {
2319       tree type = build_pointer_type (TREE_TYPE (base));
2320       tree astep;
2321
2322       if (TMR_BASE (base)
2323           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2324         {
2325           civ = get_iv (data, TMR_BASE (base));
2326           if (!civ)
2327             goto fail;
2328
2329           TMR_BASE (base) = civ->base;
2330           step = civ->step;
2331         }
2332       if (TMR_INDEX2 (base)
2333           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2334         {
2335           civ = get_iv (data, TMR_INDEX2 (base));
2336           if (!civ)
2337             goto fail;
2338
2339           TMR_INDEX2 (base) = civ->base;
2340           step = civ->step;
2341         }
2342       if (TMR_INDEX (base)
2343           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2344         {
2345           civ = get_iv (data, TMR_INDEX (base));
2346           if (!civ)
2347             goto fail;
2348
2349           TMR_INDEX (base) = civ->base;
2350           astep = civ->step;
2351
2352           if (astep)
2353             {
2354               if (TMR_STEP (base))
2355                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2356
2357               step = fold_build2 (PLUS_EXPR, type, step, astep);
2358             }
2359         }
2360
2361       if (integer_zerop (step))
2362         goto fail;
2363       base = tree_mem_ref_addr (type, base);
2364     }
2365   else
2366     {
2367       ifs_ivopts_data.ivopts_data = data;
2368       ifs_ivopts_data.stmt = stmt;
2369       ifs_ivopts_data.step = size_zero_node;
2370       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2371           || integer_zerop (ifs_ivopts_data.step))
2372         goto fail;
2373       step = ifs_ivopts_data.step;
2374
2375       /* Check that the base expression is addressable.  This needs
2376          to be done after substituting bases of IVs into it.  */
2377       if (may_be_nonaddressable_p (base))
2378         goto fail;
2379
2380       /* Moreover, on strict alignment platforms, check that it is
2381          sufficiently aligned.  */
2382       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2383         goto fail;
2384
2385       base = build_fold_addr_expr (base);
2386
2387       /* Substituting bases of IVs into the base expression might
2388          have caused folding opportunities.  */
2389       if (TREE_CODE (base) == ADDR_EXPR)
2390         {
2391           tree *ref = &TREE_OPERAND (base, 0);
2392           while (handled_component_p (*ref))
2393             ref = &TREE_OPERAND (*ref, 0);
2394           if (TREE_CODE (*ref) == MEM_REF)
2395             {
2396               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2397                                       TREE_OPERAND (*ref, 0),
2398                                       TREE_OPERAND (*ref, 1));
2399               if (tem)
2400                 *ref = tem;
2401             }
2402         }
2403     }
2404
2405   civ = alloc_iv (data, base, step);
2406   /* Fail if base object of this memory reference is unknown.  */
2407   if (civ->base_object == NULL_TREE)
2408     goto fail;
2409
2410   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2411   return;
2412
2413 fail:
2414   for_each_index (op_p, idx_record_use, data);
2415 }
2416
2417 /* Finds and records invariants used in STMT.  */
2418
2419 static void
2420 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2421 {
2422   ssa_op_iter iter;
2423   use_operand_p use_p;
2424   tree op;
2425
2426   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2427     {
2428       op = USE_FROM_PTR (use_p);
2429       record_invariant (data, op, false);
2430     }
2431 }
2432
2433 /* CALL calls an internal function.  If operand *OP_P will become an
2434    address when the call is expanded, return the type of the memory
2435    being addressed, otherwise return null.  */
2436
2437 static tree
2438 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2439 {
2440   switch (gimple_call_internal_fn (call))
2441     {
2442     case IFN_MASK_LOAD:
2443     case IFN_MASK_LOAD_LANES:
2444     case IFN_MASK_LEN_LOAD_LANES:
2445     case IFN_LEN_LOAD:
2446     case IFN_MASK_LEN_LOAD:
2447       if (op_p == gimple_call_arg_ptr (call, 0))
2448         return TREE_TYPE (gimple_call_lhs (call));
2449       return NULL_TREE;
2450
2451     case IFN_MASK_STORE:
2452     case IFN_MASK_STORE_LANES:
2453     case IFN_MASK_LEN_STORE_LANES:
2454     case IFN_LEN_STORE:
2455     case IFN_MASK_LEN_STORE:
2456       {
2457         if (op_p == gimple_call_arg_ptr (call, 0))
2458           {
2459             internal_fn ifn = gimple_call_internal_fn (call);
2460             int index = internal_fn_stored_value_index (ifn);
2461             return TREE_TYPE (gimple_call_arg (call, index));
2462           }
2463         return NULL_TREE;
2464       }
2465
2466     default:
2467       return NULL_TREE;
2468     }
2469 }
2470
2471 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2472    Return true if the operand will become an address when STMT
2473    is expanded and record the associated address use if so.  */
2474
2475 static bool
2476 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2477                        struct iv *iv)
2478 {
2479   /* Fail if base object of this memory reference is unknown.  */
2480   if (iv->base_object == NULL_TREE)
2481     return false;
2482
2483   tree mem_type = NULL_TREE;
2484   if (gcall *call = dyn_cast <gcall *> (stmt))
2485     if (gimple_call_internal_p (call))
2486       mem_type = get_mem_type_for_internal_fn (call, op_p);
2487   if (mem_type)
2488     {
2489       iv = alloc_iv (data, iv->base, iv->step);
2490       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2491       return true;
2492     }
2493   return false;
2494 }
2495
2496 /* Finds interesting uses of induction variables in the statement STMT.  */
2497
2498 static void
2499 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2500 {
2501   struct iv *iv;
2502   tree op, *lhs, *rhs;
2503   ssa_op_iter iter;
2504   use_operand_p use_p;
2505   enum tree_code code;
2506
2507   find_invariants_stmt (data, stmt);
2508
2509   if (gimple_code (stmt) == GIMPLE_COND)
2510     {
2511       find_interesting_uses_cond (data, stmt);
2512       return;
2513     }
2514
2515   if (is_gimple_assign (stmt))
2516     {
2517       lhs = gimple_assign_lhs_ptr (stmt);
2518       rhs = gimple_assign_rhs1_ptr (stmt);
2519
2520       if (TREE_CODE (*lhs) == SSA_NAME)
2521         {
2522           /* If the statement defines an induction variable, the uses are not
2523              interesting by themselves.  */
2524
2525           iv = get_iv (data, *lhs);
2526
2527           if (iv && !integer_zerop (iv->step))
2528             return;
2529         }
2530
2531       code = gimple_assign_rhs_code (stmt);
2532       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2533           && (REFERENCE_CLASS_P (*rhs)
2534               || is_gimple_val (*rhs)))
2535         {
2536           if (REFERENCE_CLASS_P (*rhs))
2537             find_interesting_uses_address (data, stmt, rhs);
2538           else
2539             find_interesting_uses_op (data, *rhs);
2540
2541           if (REFERENCE_CLASS_P (*lhs))
2542             find_interesting_uses_address (data, stmt, lhs);
2543           return;
2544         }
2545       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2546         {
2547           find_interesting_uses_cond (data, stmt);
2548           return;
2549         }
2550
2551       /* TODO -- we should also handle address uses of type
2552
2553          memory = call (whatever);
2554
2555          and
2556
2557          call (memory).  */
2558     }
2559
2560   if (gimple_code (stmt) == GIMPLE_PHI
2561       && gimple_bb (stmt) == data->current_loop->header)
2562     {
2563       iv = get_iv (data, PHI_RESULT (stmt));
2564
2565       if (iv && !integer_zerop (iv->step))
2566         return;
2567     }
2568
2569   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2570     {
2571       op = USE_FROM_PTR (use_p);
2572
2573       if (TREE_CODE (op) != SSA_NAME)
2574         continue;
2575
2576       iv = get_iv (data, op);
2577       if (!iv)
2578         continue;
2579
2580       if (!find_address_like_use (data, stmt, use_p->use, iv))
2581         find_interesting_uses_op (data, op);
2582     }
2583 }
2584
2585 /* Finds interesting uses of induction variables outside of loops
2586    on loop exit edge EXIT.  */
2587
2588 static void
2589 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2590 {
2591   gphi *phi;
2592   gphi_iterator psi;
2593   tree def;
2594
2595   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2596     {
2597       phi = psi.phi ();
2598       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2599       if (!virtual_operand_p (def))
2600         find_interesting_uses_op (data, def);
2601     }
2602 }
2603
2604 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2605    mode for memory reference represented by USE.  */
2606
2607 static GTY (()) vec<rtx, va_gc> *addr_list;
2608
2609 static bool
2610 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2611 {
2612   rtx reg, addr;
2613   unsigned list_index;
2614   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2615   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2616
2617   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2618   if (list_index >= vec_safe_length (addr_list))
2619     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2620
2621   addr = (*addr_list)[list_index];
2622   if (!addr)
2623     {
2624       addr_mode = targetm.addr_space.address_mode (as);
2625       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2626       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2627       (*addr_list)[list_index] = addr;
2628     }
2629   else
2630     addr_mode = GET_MODE (addr);
2631
2632   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2633   return (memory_address_addr_space_p (mem_mode, addr, as));
2634 }
2635
2636 /* Comparison function to sort group in ascending order of addr_offset.  */
2637
2638 static int
2639 group_compare_offset (const void *a, const void *b)
2640 {
2641   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2642   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2643
2644   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2645 }
2646
2647 /* Check if small groups should be split.  Return true if no group
2648    contains more than two uses with distinct addr_offsets.  Return
2649    false otherwise.  We want to split such groups because:
2650
2651      1) Small groups don't have much benefit and may interfer with
2652         general candidate selection.
2653      2) Size for problem with only small groups is usually small and
2654         general algorithm can handle it well.
2655
2656    TODO -- Above claim may not hold when we want to merge memory
2657    accesses with conseuctive addresses.  */
2658
2659 static bool
2660 split_small_address_groups_p (struct ivopts_data *data)
2661 {
2662   unsigned int i, j, distinct = 1;
2663   struct iv_use *pre;
2664   struct iv_group *group;
2665
2666   for (i = 0; i < data->vgroups.length (); i++)
2667     {
2668       group = data->vgroups[i];
2669       if (group->vuses.length () == 1)
2670         continue;
2671
2672       gcc_assert (address_p (group->type));
2673       if (group->vuses.length () == 2)
2674         {
2675           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2676                                       group->vuses[1]->addr_offset) > 0)
2677             std::swap (group->vuses[0], group->vuses[1]);
2678         }
2679       else
2680         group->vuses.qsort (group_compare_offset);
2681
2682       if (distinct > 2)
2683         continue;
2684
2685       distinct = 1;
2686       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2687         {
2688           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2689             {
2690               pre = group->vuses[j];
2691               distinct++;
2692             }
2693
2694           if (distinct > 2)
2695             break;
2696         }
2697     }
2698
2699   return (distinct <= 2);
2700 }
2701
2702 /* For each group of address type uses, this function further groups
2703    these uses according to the maximum offset supported by target's
2704    [base + offset] addressing mode.  */
2705
2706 static void
2707 split_address_groups (struct ivopts_data *data)
2708 {
2709   unsigned int i, j;
2710   /* Always split group.  */
2711   bool split_p = split_small_address_groups_p (data);
2712
2713   for (i = 0; i < data->vgroups.length (); i++)
2714     {
2715       struct iv_group *new_group = NULL;
2716       struct iv_group *group = data->vgroups[i];
2717       struct iv_use *use = group->vuses[0];
2718
2719       use->id = 0;
2720       use->group_id = group->id;
2721       if (group->vuses.length () == 1)
2722         continue;
2723
2724       gcc_assert (address_p (use->type));
2725
2726       for (j = 1; j < group->vuses.length ();)
2727         {
2728           struct iv_use *next = group->vuses[j];
2729           poly_int64 offset = next->addr_offset - use->addr_offset;
2730
2731           /* Split group if aksed to, or the offset against the first
2732              use can't fit in offset part of addressing mode.  IV uses
2733              having the same offset are still kept in one group.  */
2734           if (maybe_ne (offset, 0)
2735               && (split_p || !addr_offset_valid_p (use, offset)))
2736             {
2737               if (!new_group)
2738                 new_group = record_group (data, group->type);
2739               group->vuses.ordered_remove (j);
2740               new_group->vuses.safe_push (next);
2741               continue;
2742             }
2743
2744           next->id = j;
2745           next->group_id = group->id;
2746           j++;
2747         }
2748     }
2749 }
2750
2751 /* Finds uses of the induction variables that are interesting.  */
2752
2753 static void
2754 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2755 {
2756   basic_block bb;
2757   gimple_stmt_iterator bsi;
2758   unsigned i;
2759   edge e;
2760
2761   for (i = 0; i < data->current_loop->num_nodes; i++)
2762     {
2763       edge_iterator ei;
2764       bb = body[i];
2765
2766       FOR_EACH_EDGE (e, ei, bb->succs)
2767         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2768             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2769           find_interesting_uses_outside (data, e);
2770
2771       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2772         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2773       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2774         if (!is_gimple_debug (gsi_stmt (bsi)))
2775           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2776     }
2777
2778   split_address_groups (data);
2779
2780   if (dump_file && (dump_flags & TDF_DETAILS))
2781     {
2782       fprintf (dump_file, "\n<IV Groups>:\n");
2783       dump_groups (dump_file, data);
2784       fprintf (dump_file, "\n");
2785     }
2786 }
2787
2788 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2789    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2790    we are at the top-level of the processed address.  */
2791
2792 static tree
2793 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2794                 poly_int64 *offset)
2795 {
2796   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2797   enum tree_code code;
2798   tree type, orig_type = TREE_TYPE (expr);
2799   poly_int64 off0, off1;
2800   HOST_WIDE_INT st;
2801   tree orig_expr = expr;
2802
2803   STRIP_NOPS (expr);
2804
2805   type = TREE_TYPE (expr);
2806   code = TREE_CODE (expr);
2807   *offset = 0;
2808
2809   switch (code)
2810     {
2811     case POINTER_PLUS_EXPR:
2812     case PLUS_EXPR:
2813     case MINUS_EXPR:
2814       op0 = TREE_OPERAND (expr, 0);
2815       op1 = TREE_OPERAND (expr, 1);
2816
2817       op0 = strip_offset_1 (op0, false, false, &off0);
2818       op1 = strip_offset_1 (op1, false, false, &off1);
2819
2820       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2821       if (op0 == TREE_OPERAND (expr, 0)
2822           && op1 == TREE_OPERAND (expr, 1))
2823         return orig_expr;
2824
2825       if (integer_zerop (op1))
2826         expr = op0;
2827       else if (integer_zerop (op0))
2828         {
2829           if (code == MINUS_EXPR)
2830             expr = fold_build1 (NEGATE_EXPR, type, op1);
2831           else
2832             expr = op1;
2833         }
2834       else
2835         expr = fold_build2 (code, type, op0, op1);
2836
2837       return fold_convert (orig_type, expr);
2838
2839     case MULT_EXPR:
2840       op1 = TREE_OPERAND (expr, 1);
2841       if (!cst_and_fits_in_hwi (op1))
2842         return orig_expr;
2843
2844       op0 = TREE_OPERAND (expr, 0);
2845       op0 = strip_offset_1 (op0, false, false, &off0);
2846       if (op0 == TREE_OPERAND (expr, 0))
2847         return orig_expr;
2848
2849       *offset = off0 * int_cst_value (op1);
2850       if (integer_zerop (op0))
2851         expr = op0;
2852       else
2853         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2854
2855       return fold_convert (orig_type, expr);
2856
2857     case ARRAY_REF:
2858     case ARRAY_RANGE_REF:
2859       if (!inside_addr)
2860         return orig_expr;
2861
2862       step = array_ref_element_size (expr);
2863       if (!cst_and_fits_in_hwi (step))
2864         break;
2865
2866       st = int_cst_value (step);
2867       op1 = TREE_OPERAND (expr, 1);
2868       op1 = strip_offset_1 (op1, false, false, &off1);
2869       *offset = off1 * st;
2870
2871       if (top_compref
2872           && integer_zerop (op1))
2873         {
2874           /* Strip the component reference completely.  */
2875           op0 = TREE_OPERAND (expr, 0);
2876           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2877           *offset += off0;
2878           return op0;
2879         }
2880       break;
2881
2882     case COMPONENT_REF:
2883       {
2884         tree field;
2885
2886         if (!inside_addr)
2887           return orig_expr;
2888
2889         tmp = component_ref_field_offset (expr);
2890         field = TREE_OPERAND (expr, 1);
2891         if (top_compref
2892             && cst_and_fits_in_hwi (tmp)
2893             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2894           {
2895             HOST_WIDE_INT boffset, abs_off;
2896
2897             /* Strip the component reference completely.  */
2898             op0 = TREE_OPERAND (expr, 0);
2899             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2900             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2901             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2902             if (boffset < 0)
2903               abs_off = -abs_off;
2904
2905             *offset = off0 + int_cst_value (tmp) + abs_off;
2906             return op0;
2907           }
2908       }
2909       break;
2910
2911     case ADDR_EXPR:
2912       op0 = TREE_OPERAND (expr, 0);
2913       op0 = strip_offset_1 (op0, true, true, &off0);
2914       *offset += off0;
2915
2916       if (op0 == TREE_OPERAND (expr, 0))
2917         return orig_expr;
2918
2919       expr = build_fold_addr_expr (op0);
2920       return fold_convert (orig_type, expr);
2921
2922     case MEM_REF:
2923       /* ???  Offset operand?  */
2924       inside_addr = false;
2925       break;
2926
2927     default:
2928       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2929         return build_int_cst (orig_type, 0);
2930       return orig_expr;
2931     }
2932
2933   /* Default handling of expressions for that we want to recurse into
2934      the first operand.  */
2935   op0 = TREE_OPERAND (expr, 0);
2936   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2937   *offset += off0;
2938
2939   if (op0 == TREE_OPERAND (expr, 0)
2940       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2941     return orig_expr;
2942
2943   expr = copy_node (expr);
2944   TREE_OPERAND (expr, 0) = op0;
2945   if (op1)
2946     TREE_OPERAND (expr, 1) = op1;
2947
2948   /* Inside address, we might strip the top level component references,
2949      thus changing type of the expression.  Handling of ADDR_EXPR
2950      will fix that.  */
2951   expr = fold_convert (orig_type, expr);
2952
2953   return expr;
2954 }
2955
2956 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2957
2958 static tree
2959 strip_offset (tree expr, poly_uint64_pod *offset)
2960 {
2961   poly_int64 off;
2962   tree core = strip_offset_1 (expr, false, false, &off);
2963   *offset = off;
2964   return core;
2965 }
2966
2967 /* Returns variant of TYPE that can be used as base for different uses.
2968    We return unsigned type with the same precision, which avoids problems
2969    with overflows.  */
2970
2971 static tree
2972 generic_type_for (tree type)
2973 {
2974   if (POINTER_TYPE_P (type))
2975     return unsigned_type_for (type);
2976
2977   if (TYPE_UNSIGNED (type))
2978     return type;
2979
2980   return unsigned_type_for (type);
2981 }
2982
2983 /* Private data for walk_tree.  */
2984
2985 struct walk_tree_data
2986 {
2987   bitmap *inv_vars;
2988   struct ivopts_data *idata;
2989 };
2990
2991 /* Callback function for walk_tree, it records invariants and symbol
2992    reference in *EXPR_P.  DATA is the structure storing result info.  */
2993
2994 static tree
2995 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2996 {
2997   tree op = *expr_p;
2998   struct version_info *info;
2999   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
3000
3001   if (TREE_CODE (op) != SSA_NAME)
3002     return NULL_TREE;
3003
3004   info = name_info (wdata->idata, op);
3005   /* Because we expand simple operations when finding IVs, loop invariant
3006      variable that isn't referred by the original loop could be used now.
3007      Record such invariant variables here.  */
3008   if (!info->iv)
3009     {
3010       struct ivopts_data *idata = wdata->idata;
3011       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3012
3013       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3014         {
3015           tree steptype = TREE_TYPE (op);
3016           if (POINTER_TYPE_P (steptype))
3017             steptype = sizetype;
3018           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3019           record_invariant (idata, op, false);
3020         }
3021     }
3022   if (!info->inv_id || info->has_nonlin_use)
3023     return NULL_TREE;
3024
3025   if (!*wdata->inv_vars)
3026     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3027   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3028
3029   return NULL_TREE;
3030 }
3031
3032 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3033    store it.  */
3034
3035 static inline void
3036 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3037 {
3038   struct walk_tree_data wdata;
3039
3040   if (!inv_vars)
3041     return;
3042
3043   wdata.idata = data;
3044   wdata.inv_vars = inv_vars;
3045   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3046 }
3047
3048 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3049    will be recorded if it doesn't exist yet.  Given below two exprs:
3050      inv_expr + cst1, inv_expr + cst2
3051    It's hard to make decision whether constant part should be stripped
3052    or not.  We choose to not strip based on below facts:
3053      1) We need to count ADD cost for constant part if it's stripped,
3054         which isn't always trivial where this functions is called.
3055      2) Stripping constant away may be conflict with following loop
3056         invariant hoisting pass.
3057      3) Not stripping constant away results in more invariant exprs,
3058         which usually leads to decision preferring lower reg pressure.  */
3059
3060 static iv_inv_expr_ent *
3061 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3062 {
3063   STRIP_NOPS (inv_expr);
3064
3065   if (poly_int_tree_p (inv_expr)
3066       || TREE_CODE (inv_expr) == SSA_NAME)
3067     return NULL;
3068
3069   /* Don't strip constant part away as we used to.  */
3070
3071   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3072   struct iv_inv_expr_ent ent;
3073   ent.expr = inv_expr;
3074   ent.hash = iterative_hash_expr (inv_expr, 0);
3075   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3076
3077   if (!*slot)
3078     {
3079       *slot = XNEW (struct iv_inv_expr_ent);
3080       (*slot)->expr = inv_expr;
3081       (*slot)->hash = ent.hash;
3082       (*slot)->id = ++data->max_inv_expr_id;
3083     }
3084
3085   return *slot;
3086 }
3087
3088
3089 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3090    unsuitable as ivopts candidates for potentially involving undefined
3091    behavior.  */
3092
3093 static tree
3094 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3095 {
3096   basic_block bb = (basic_block) bb_;
3097   if (TREE_CODE (*tp) == SSA_NAME
3098       && ssa_name_maybe_undef_p (*tp)
3099       && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3100     return *tp;
3101   if (!EXPR_P (*tp))
3102     *walk_subtrees = 0;
3103   return NULL;
3104 }
3105
3106 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3107    position to POS.  If USE is not NULL, the candidate is set as related to
3108    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3109    replacement of the final value of the iv by a direct computation.  */
3110
3111 static struct iv_cand *
3112 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3113                  enum iv_position pos, struct iv_use *use,
3114                  gimple *incremented_at, struct iv *orig_iv = NULL,
3115                  bool doloop = false)
3116 {
3117   unsigned i;
3118   struct iv_cand *cand = NULL;
3119   tree type, orig_type;
3120
3121   gcc_assert (base && step);
3122
3123   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3124      live, but the ivopts code may replace a real pointer with one
3125      pointing before or after the memory block that is then adjusted
3126      into the memory block during the loop.  FIXME: It would likely be
3127      better to actually force the pointer live and still use ivopts;
3128      for example, it would be enough to write the pointer into memory
3129      and keep it there until after the loop.  */
3130   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3131     return NULL;
3132
3133   /* If BASE contains undefined SSA names make sure we only record
3134      the original IV.  */
3135   bool involves_undefs = false;
3136   if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3137     {
3138       if (pos != IP_ORIGINAL)
3139         return NULL;
3140       important = false;
3141       involves_undefs = true;
3142     }
3143
3144   /* For non-original variables, make sure their values are computed in a type
3145      that does not invoke undefined behavior on overflows (since in general,
3146      we cannot prove that these induction variables are non-wrapping).  */
3147   if (pos != IP_ORIGINAL)
3148     {
3149       orig_type = TREE_TYPE (base);
3150       type = generic_type_for (orig_type);
3151       if (type != orig_type)
3152         {
3153           base = fold_convert (type, base);
3154           step = fold_convert (type, step);
3155         }
3156     }
3157
3158   for (i = 0; i < data->vcands.length (); i++)
3159     {
3160       cand = data->vcands[i];
3161
3162       if (cand->pos != pos)
3163         continue;
3164
3165       if (cand->incremented_at != incremented_at
3166           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3167               && cand->ainc_use != use))
3168         continue;
3169
3170       if (operand_equal_p (base, cand->iv->base, 0)
3171           && operand_equal_p (step, cand->iv->step, 0)
3172           && (TYPE_PRECISION (TREE_TYPE (base))
3173               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3174         break;
3175     }
3176
3177   if (i == data->vcands.length ())
3178     {
3179       cand = XCNEW (struct iv_cand);
3180       cand->id = i;
3181       cand->iv = alloc_iv (data, base, step);
3182       cand->pos = pos;
3183       if (pos != IP_ORIGINAL)
3184         {
3185           if (doloop)
3186             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3187           else
3188             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3189           cand->var_after = cand->var_before;
3190         }
3191       cand->important = important;
3192       cand->involves_undefs = involves_undefs;
3193       cand->incremented_at = incremented_at;
3194       cand->doloop_p = doloop;
3195       data->vcands.safe_push (cand);
3196
3197       if (!poly_int_tree_p (step))
3198         {
3199           find_inv_vars (data, &step, &cand->inv_vars);
3200
3201           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3202           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3203           if (inv_expr != NULL)
3204             {
3205               cand->inv_exprs = cand->inv_vars;
3206               cand->inv_vars = NULL;
3207               if (cand->inv_exprs)
3208                 bitmap_clear (cand->inv_exprs);
3209               else
3210                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3211
3212               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3213             }
3214         }
3215
3216       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3217         cand->ainc_use = use;
3218       else
3219         cand->ainc_use = NULL;
3220
3221       cand->orig_iv = orig_iv;
3222       if (dump_file && (dump_flags & TDF_DETAILS))
3223         dump_cand (dump_file, cand);
3224     }
3225
3226   cand->important |= important;
3227   cand->doloop_p |= doloop;
3228
3229   /* Relate candidate to the group for which it is added.  */
3230   if (use)
3231     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3232
3233   return cand;
3234 }
3235
3236 /* Returns true if incrementing the induction variable at the end of the LOOP
3237    is allowed.
3238
3239    The purpose is to avoid splitting latch edge with a biv increment, thus
3240    creating a jump, possibly confusing other optimization passes and leaving
3241    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3242    available (so we do not have a better alternative), or if the latch edge
3243    is already nonempty.  */
3244
3245 static bool
3246 allow_ip_end_pos_p (class loop *loop)
3247 {
3248   if (!ip_normal_pos (loop))
3249     return true;
3250
3251   if (!empty_block_p (ip_end_pos (loop)))
3252     return true;
3253
3254   return false;
3255 }
3256
3257 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3258    Important field is set to IMPORTANT.  */
3259
3260 static void
3261 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3262                         bool important, struct iv_use *use)
3263 {
3264   basic_block use_bb = gimple_bb (use->stmt);
3265   machine_mode mem_mode;
3266   unsigned HOST_WIDE_INT cstepi;
3267
3268   /* If we insert the increment in any position other than the standard
3269      ones, we must ensure that it is incremented once per iteration.
3270      It must not be in an inner nested loop, or one side of an if
3271      statement.  */
3272   if (use_bb->loop_father != data->current_loop
3273       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3274       || stmt_can_throw_internal (cfun, use->stmt)
3275       || !cst_and_fits_in_hwi (step))
3276     return;
3277
3278   cstepi = int_cst_value (step);
3279
3280   mem_mode = TYPE_MODE (use->mem_type);
3281   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3282         || USE_STORE_PRE_INCREMENT (mem_mode))
3283        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3284       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3285            || USE_STORE_PRE_DECREMENT (mem_mode))
3286           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3287     {
3288       enum tree_code code = MINUS_EXPR;
3289       tree new_base;
3290       tree new_step = step;
3291
3292       if (POINTER_TYPE_P (TREE_TYPE (base)))
3293         {
3294           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3295           code = POINTER_PLUS_EXPR;
3296         }
3297       else
3298         new_step = fold_convert (TREE_TYPE (base), new_step);
3299       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3300       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3301                        use->stmt);
3302     }
3303   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3304         || USE_STORE_POST_INCREMENT (mem_mode))
3305        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3306       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3307            || USE_STORE_POST_DECREMENT (mem_mode))
3308           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3309     {
3310       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3311                        use->stmt);
3312     }
3313 }
3314
3315 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3316    position to POS.  If USE is not NULL, the candidate is set as related to
3317    it.  The candidate computation is scheduled before exit condition and at
3318    the end of loop.  */
3319
3320 static void
3321 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3322                struct iv_use *use, struct iv *orig_iv = NULL,
3323                bool doloop = false)
3324 {
3325   if (ip_normal_pos (data->current_loop))
3326     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3327                      doloop);
3328   /* Exclude doloop candidate here since it requires decrement then comparison
3329      and jump, the IP_END position doesn't match.  */
3330   if (!doloop && ip_end_pos (data->current_loop)
3331       && allow_ip_end_pos_p (data->current_loop))
3332     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3333 }
3334
3335 /* Adds standard iv candidates.  */
3336
3337 static void
3338 add_standard_iv_candidates (struct ivopts_data *data)
3339 {
3340   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3341
3342   /* The same for a double-integer type if it is still fast enough.  */
3343   if (TYPE_PRECISION
3344         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3345       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3346     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3347                    build_int_cst (long_integer_type_node, 1), true, NULL);
3348
3349   /* The same for a double-integer type if it is still fast enough.  */
3350   if (TYPE_PRECISION
3351         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3352       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3353     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3354                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3355 }
3356
3357
3358 /* Adds candidates bases on the old induction variable IV.  */
3359
3360 static void
3361 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3362 {
3363   gimple *phi;
3364   tree def;
3365   struct iv_cand *cand;
3366
3367   /* Check if this biv is used in address type use.  */
3368   if (iv->no_overflow  && iv->have_address_use
3369       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3370       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3371     {
3372       tree base = fold_convert (sizetype, iv->base);
3373       tree step = fold_convert (sizetype, iv->step);
3374
3375       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3376       add_candidate (data, base, step, true, NULL, iv);
3377       /* Add iv cand of the original type only if it has nonlinear use.  */
3378       if (iv->nonlin_use)
3379         add_candidate (data, iv->base, iv->step, true, NULL);
3380     }
3381   else
3382     add_candidate (data, iv->base, iv->step, true, NULL);
3383
3384   /* The same, but with initial value zero.  */
3385   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3386     add_candidate (data, size_int (0), iv->step, true, NULL);
3387   else
3388     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3389                    iv->step, true, NULL);
3390
3391   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3392   if (gimple_code (phi) == GIMPLE_PHI)
3393     {
3394       /* Additionally record the possibility of leaving the original iv
3395          untouched.  */
3396       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3397       /* Don't add candidate if it's from another PHI node because
3398          it's an affine iv appearing in the form of PEELED_CHREC.  */
3399       phi = SSA_NAME_DEF_STMT (def);
3400       if (gimple_code (phi) != GIMPLE_PHI)
3401         {
3402           cand = add_candidate_1 (data,
3403                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3404                                   SSA_NAME_DEF_STMT (def));
3405           if (cand)
3406             {
3407               cand->var_before = iv->ssa_name;
3408               cand->var_after = def;
3409             }
3410         }
3411       else
3412         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3413     }
3414 }
3415
3416 /* Adds candidates based on the old induction variables.  */
3417
3418 static void
3419 add_iv_candidate_for_bivs (struct ivopts_data *data)
3420 {
3421   unsigned i;
3422   struct iv *iv;
3423   bitmap_iterator bi;
3424
3425   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3426     {
3427       iv = ver_info (data, i)->iv;
3428       if (iv && iv->biv_p && !integer_zerop (iv->step))
3429         add_iv_candidate_for_biv (data, iv);
3430     }
3431 }
3432
3433 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3434
3435 static void
3436 record_common_cand (struct ivopts_data *data, tree base,
3437                     tree step, struct iv_use *use)
3438 {
3439   class iv_common_cand ent;
3440   class iv_common_cand **slot;
3441
3442   ent.base = base;
3443   ent.step = step;
3444   ent.hash = iterative_hash_expr (base, 0);
3445   ent.hash = iterative_hash_expr (step, ent.hash);
3446
3447   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3448   if (*slot == NULL)
3449     {
3450       *slot = new iv_common_cand ();
3451       (*slot)->base = base;
3452       (*slot)->step = step;
3453       (*slot)->uses.create (8);
3454       (*slot)->hash = ent.hash;
3455       data->iv_common_cands.safe_push ((*slot));
3456     }
3457
3458   gcc_assert (use != NULL);
3459   (*slot)->uses.safe_push (use);
3460   return;
3461 }
3462
3463 /* Comparison function used to sort common candidates.  */
3464
3465 static int
3466 common_cand_cmp (const void *p1, const void *p2)
3467 {
3468   unsigned n1, n2;
3469   const class iv_common_cand *const *const ccand1
3470     = (const class iv_common_cand *const *)p1;
3471   const class iv_common_cand *const *const ccand2
3472     = (const class iv_common_cand *const *)p2;
3473
3474   n1 = (*ccand1)->uses.length ();
3475   n2 = (*ccand2)->uses.length ();
3476   return n2 - n1;
3477 }
3478
3479 /* Adds IV candidates based on common candidated recorded.  */
3480
3481 static void
3482 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3483 {
3484   unsigned i, j;
3485   struct iv_cand *cand_1, *cand_2;
3486
3487   data->iv_common_cands.qsort (common_cand_cmp);
3488   for (i = 0; i < data->iv_common_cands.length (); i++)
3489     {
3490       class iv_common_cand *ptr = data->iv_common_cands[i];
3491
3492       /* Only add IV candidate if it's derived from multiple uses.  */
3493       if (ptr->uses.length () <= 1)
3494         break;
3495
3496       cand_1 = NULL;
3497       cand_2 = NULL;
3498       if (ip_normal_pos (data->current_loop))
3499         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3500                                   false, IP_NORMAL, NULL, NULL);
3501
3502       if (ip_end_pos (data->current_loop)
3503           && allow_ip_end_pos_p (data->current_loop))
3504         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3505                                   false, IP_END, NULL, NULL);
3506
3507       /* Bind deriving uses and the new candidates.  */
3508       for (j = 0; j < ptr->uses.length (); j++)
3509         {
3510           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3511           if (cand_1)
3512             bitmap_set_bit (group->related_cands, cand_1->id);
3513           if (cand_2)
3514             bitmap_set_bit (group->related_cands, cand_2->id);
3515         }
3516     }
3517
3518   /* Release data since it is useless from this point.  */
3519   data->iv_common_cand_tab->empty ();
3520   data->iv_common_cands.truncate (0);
3521 }
3522
3523 /* Adds candidates based on the value of USE's iv.  */
3524
3525 static void
3526 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3527 {
3528   poly_uint64 offset;
3529   tree base;
3530   struct iv *iv = use->iv;
3531   tree basetype = TREE_TYPE (iv->base);
3532
3533   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3534      precision types, instead, add candidate for the corresponding scev in
3535      unsigned type with the same precision.  See PR93674 for more info.  */
3536   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3537       || !type_has_mode_precision_p (basetype))
3538     {
3539       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3540                                                  TYPE_UNSIGNED (basetype));
3541       add_candidate (data, fold_convert (basetype, iv->base),
3542                      fold_convert (basetype, iv->step), false, NULL);
3543       return;
3544     }
3545
3546   add_candidate (data, iv->base, iv->step, false, use);
3547
3548   /* Record common candidate for use in case it can be shared by others.  */
3549   record_common_cand (data, iv->base, iv->step, use);
3550
3551   /* Record common candidate with initial value zero.  */
3552   basetype = TREE_TYPE (iv->base);
3553   if (POINTER_TYPE_P (basetype))
3554     basetype = sizetype;
3555   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3556
3557   /* Compare the cost of an address with an unscaled index with the cost of
3558     an address with a scaled index and add candidate if useful.  */
3559   poly_int64 step;
3560   if (use != NULL
3561       && poly_int_tree_p (iv->step, &step)
3562       && address_p (use->type))
3563     {
3564       poly_int64 new_step;
3565       unsigned int fact = preferred_mem_scale_factor
3566         (use->iv->base,
3567          TYPE_MODE (use->mem_type),
3568          optimize_loop_for_speed_p (data->current_loop));
3569
3570       if (fact != 1
3571           && multiple_p (step, fact, &new_step))
3572         add_candidate (data, size_int (0),
3573                        wide_int_to_tree (sizetype, new_step),
3574                        true, NULL);
3575     }
3576
3577   /* Record common candidate with constant offset stripped in base.
3578      Like the use itself, we also add candidate directly for it.  */
3579   base = strip_offset (iv->base, &offset);
3580   if (maybe_ne (offset, 0U) || base != iv->base)
3581     {
3582       record_common_cand (data, base, iv->step, use);
3583       add_candidate (data, base, iv->step, false, use);
3584     }
3585
3586   /* Record common candidate with base_object removed in base.  */
3587   base = iv->base;
3588   STRIP_NOPS (base);
3589   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3590     {
3591       tree step = iv->step;
3592
3593       STRIP_NOPS (step);
3594       base = TREE_OPERAND (base, 1);
3595       step = fold_convert (sizetype, step);
3596       record_common_cand (data, base, step, use);
3597       /* Also record common candidate with offset stripped.  */
3598       tree alt_base, alt_offset;
3599       split_constant_offset (base, &alt_base, &alt_offset);
3600       if (!integer_zerop (alt_offset))
3601         record_common_cand (data, alt_base, step, use);
3602     }
3603
3604   /* At last, add auto-incremental candidates.  Make such variables
3605      important since other iv uses with same base object may be based
3606      on it.  */
3607   if (use != NULL && address_p (use->type))
3608     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3609 }
3610
3611 /* Adds candidates based on the uses.  */
3612
3613 static void
3614 add_iv_candidate_for_groups (struct ivopts_data *data)
3615 {
3616   unsigned i;
3617
3618   /* Only add candidate for the first use in group.  */
3619   for (i = 0; i < data->vgroups.length (); i++)
3620     {
3621       struct iv_group *group = data->vgroups[i];
3622
3623       gcc_assert (group->vuses[0] != NULL);
3624       add_iv_candidate_for_use (data, group->vuses[0]);
3625     }
3626   add_iv_candidate_derived_from_uses (data);
3627 }
3628
3629 /* Record important candidates and add them to related_cands bitmaps.  */
3630
3631 static void
3632 record_important_candidates (struct ivopts_data *data)
3633 {
3634   unsigned i;
3635   struct iv_group *group;
3636
3637   for (i = 0; i < data->vcands.length (); i++)
3638     {
3639       struct iv_cand *cand = data->vcands[i];
3640
3641       if (cand->important)
3642         bitmap_set_bit (data->important_candidates, i);
3643     }
3644
3645   data->consider_all_candidates = (data->vcands.length ()
3646                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3647
3648   /* Add important candidates to groups' related_cands bitmaps.  */
3649   for (i = 0; i < data->vgroups.length (); i++)
3650     {
3651       group = data->vgroups[i];
3652       bitmap_ior_into (group->related_cands, data->important_candidates);
3653     }
3654 }
3655
3656 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3657    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3658    we allocate a simple list to every use.  */
3659
3660 static void
3661 alloc_use_cost_map (struct ivopts_data *data)
3662 {
3663   unsigned i, size, s;
3664
3665   for (i = 0; i < data->vgroups.length (); i++)
3666     {
3667       struct iv_group *group = data->vgroups[i];
3668
3669       if (data->consider_all_candidates)
3670         size = data->vcands.length ();
3671       else
3672         {
3673           s = bitmap_count_bits (group->related_cands);
3674
3675           /* Round up to the power of two, so that moduling by it is fast.  */
3676           size = s ? (1 << ceil_log2 (s)) : 1;
3677         }
3678
3679       group->n_map_members = size;
3680       group->cost_map = XCNEWVEC (class cost_pair, size);
3681     }
3682 }
3683
3684 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3685    on invariants INV_VARS and that the value used in expressing it is
3686    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3687
3688 static void
3689 set_group_iv_cost (struct ivopts_data *data,
3690                    struct iv_group *group, struct iv_cand *cand,
3691                    comp_cost cost, bitmap inv_vars, tree value,
3692                    enum tree_code comp, bitmap inv_exprs)
3693 {
3694   unsigned i, s;
3695
3696   if (cost.infinite_cost_p ())
3697     {
3698       BITMAP_FREE (inv_vars);
3699       BITMAP_FREE (inv_exprs);
3700       return;
3701     }
3702
3703   if (data->consider_all_candidates)
3704     {
3705       group->cost_map[cand->id].cand = cand;
3706       group->cost_map[cand->id].cost = cost;
3707       group->cost_map[cand->id].inv_vars = inv_vars;
3708       group->cost_map[cand->id].inv_exprs = inv_exprs;
3709       group->cost_map[cand->id].value = value;
3710       group->cost_map[cand->id].comp = comp;
3711       return;
3712     }
3713
3714   /* n_map_members is a power of two, so this computes modulo.  */
3715   s = cand->id & (group->n_map_members - 1);
3716   for (i = s; i < group->n_map_members; i++)
3717     if (!group->cost_map[i].cand)
3718       goto found;
3719   for (i = 0; i < s; i++)
3720     if (!group->cost_map[i].cand)
3721       goto found;
3722
3723   gcc_unreachable ();
3724
3725 found:
3726   group->cost_map[i].cand = cand;
3727   group->cost_map[i].cost = cost;
3728   group->cost_map[i].inv_vars = inv_vars;
3729   group->cost_map[i].inv_exprs = inv_exprs;
3730   group->cost_map[i].value = value;
3731   group->cost_map[i].comp = comp;
3732 }
3733
3734 /* Gets cost of (GROUP, CAND) pair.  */
3735
3736 static class cost_pair *
3737 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3738                    struct iv_cand *cand)
3739 {
3740   unsigned i, s;
3741   class cost_pair *ret;
3742
3743   if (!cand)
3744     return NULL;
3745
3746   if (data->consider_all_candidates)
3747     {
3748       ret = group->cost_map + cand->id;
3749       if (!ret->cand)
3750         return NULL;
3751
3752       return ret;
3753     }
3754
3755   /* n_map_members is a power of two, so this computes modulo.  */
3756   s = cand->id & (group->n_map_members - 1);
3757   for (i = s; i < group->n_map_members; i++)
3758     if (group->cost_map[i].cand == cand)
3759       return group->cost_map + i;
3760     else if (group->cost_map[i].cand == NULL)
3761       return NULL;
3762   for (i = 0; i < s; i++)
3763     if (group->cost_map[i].cand == cand)
3764       return group->cost_map + i;
3765     else if (group->cost_map[i].cand == NULL)
3766       return NULL;
3767
3768   return NULL;
3769 }
3770
3771 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3772 static rtx
3773 produce_memory_decl_rtl (tree obj, int *regno)
3774 {
3775   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3776   machine_mode address_mode = targetm.addr_space.address_mode (as);
3777   rtx x;
3778
3779   gcc_assert (obj);
3780   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3781     {
3782       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3783       x = gen_rtx_SYMBOL_REF (address_mode, name);
3784       SET_SYMBOL_REF_DECL (x, obj);
3785       x = gen_rtx_MEM (DECL_MODE (obj), x);
3786       set_mem_addr_space (x, as);
3787       targetm.encode_section_info (obj, x, true);
3788     }
3789   else
3790     {
3791       x = gen_raw_REG (address_mode, (*regno)++);
3792       x = gen_rtx_MEM (DECL_MODE (obj), x);
3793       set_mem_addr_space (x, as);
3794     }
3795
3796   return x;
3797 }
3798
3799 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3800    walk_tree.  DATA contains the actual fake register number.  */
3801
3802 static tree
3803 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3804 {
3805   tree obj = NULL_TREE;
3806   rtx x = NULL_RTX;
3807   int *regno = (int *) data;
3808
3809   switch (TREE_CODE (*expr_p))
3810     {
3811     case ADDR_EXPR:
3812       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3813            handled_component_p (*expr_p);
3814            expr_p = &TREE_OPERAND (*expr_p, 0))
3815         continue;
3816       obj = *expr_p;
3817       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3818         x = produce_memory_decl_rtl (obj, regno);
3819       break;
3820
3821     case SSA_NAME:
3822       *ws = 0;
3823       obj = SSA_NAME_VAR (*expr_p);
3824       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3825       if (!obj)
3826         return NULL_TREE;
3827       if (!DECL_RTL_SET_P (obj))
3828         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3829       break;
3830
3831     case VAR_DECL:
3832     case PARM_DECL:
3833     case RESULT_DECL:
3834       *ws = 0;
3835       obj = *expr_p;
3836
3837       if (DECL_RTL_SET_P (obj))
3838         break;
3839
3840       if (DECL_MODE (obj) == BLKmode)
3841         x = produce_memory_decl_rtl (obj, regno);
3842       else
3843         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3844
3845       break;
3846
3847     default:
3848       break;
3849     }
3850
3851   if (x)
3852     {
3853       decl_rtl_to_reset.safe_push (obj);
3854       SET_DECL_RTL (obj, x);
3855     }
3856
3857   return NULL_TREE;
3858 }
3859
3860 /* Predict whether the given loop will be transformed in the RTL
3861    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3862    This is only for target independent checks, see targetm.predict_doloop_p
3863    for the target dependent ones.
3864
3865    Note that according to some initial investigation, some checks like costly
3866    niter check and invalid stmt scanning don't have much gains among general
3867    cases, so keep this as simple as possible first.
3868
3869    Some RTL specific checks seems unable to be checked in gimple, if any new
3870    checks or easy checks _are_ missing here, please add them.  */
3871
3872 static bool
3873 generic_predict_doloop_p (struct ivopts_data *data)
3874 {
3875   class loop *loop = data->current_loop;
3876
3877   /* Call target hook for target dependent checks.  */
3878   if (!targetm.predict_doloop_p (loop))
3879     {
3880       if (dump_file && (dump_flags & TDF_DETAILS))
3881         fprintf (dump_file, "Predict doloop failure due to"
3882                             " target specific checks.\n");
3883       return false;
3884     }
3885
3886   /* Similar to doloop_optimize, check iteration description to know it's
3887      suitable or not.  Keep it as simple as possible, feel free to extend it
3888      if you find any multiple exits cases matter.  */
3889   edge exit = single_dom_exit (loop);
3890   class tree_niter_desc *niter_desc;
3891   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3892     {
3893       if (dump_file && (dump_flags & TDF_DETAILS))
3894         fprintf (dump_file, "Predict doloop failure due to"
3895                             " unexpected niters.\n");
3896       return false;
3897     }
3898
3899   /* Similar to doloop_optimize, check whether iteration count too small
3900      and not profitable.  */
3901   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3902   if (est_niter == -1)
3903     est_niter = get_likely_max_loop_iterations_int (loop);
3904   if (est_niter >= 0 && est_niter < 3)
3905     {
3906       if (dump_file && (dump_flags & TDF_DETAILS))
3907         fprintf (dump_file,
3908                  "Predict doloop failure due to"
3909                  " too few iterations (%u).\n",
3910                  (unsigned int) est_niter);
3911       return false;
3912     }
3913
3914   return true;
3915 }
3916
3917 /* Determines cost of the computation of EXPR.  */
3918
3919 static unsigned
3920 computation_cost (tree expr, bool speed)
3921 {
3922   rtx_insn *seq;
3923   rtx rslt;
3924   tree type = TREE_TYPE (expr);
3925   unsigned cost;
3926   /* Avoid using hard regs in ways which may be unsupported.  */
3927   int regno = LAST_VIRTUAL_REGISTER + 1;
3928   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3929   enum node_frequency real_frequency = node->frequency;
3930
3931   node->frequency = NODE_FREQUENCY_NORMAL;
3932   crtl->maybe_hot_insn_p = speed;
3933   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3934   start_sequence ();
3935   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3936   seq = get_insns ();
3937   end_sequence ();
3938   default_rtl_profile ();
3939   node->frequency = real_frequency;
3940
3941   cost = seq_cost (seq, speed);
3942   if (MEM_P (rslt))
3943     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3944                           TYPE_ADDR_SPACE (type), speed);
3945   else if (!REG_P (rslt))
3946     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3947
3948   return cost;
3949 }
3950
3951 /* Returns variable containing the value of candidate CAND at statement AT.  */
3952
3953 static tree
3954 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3955 {
3956   if (stmt_after_increment (loop, cand, stmt))
3957     return cand->var_after;
3958   else
3959     return cand->var_before;
3960 }
3961
3962 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3963    same precision that is at least as wide as the precision of TYPE, stores
3964    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3965    type of A and B.  */
3966
3967 static tree
3968 determine_common_wider_type (tree *a, tree *b)
3969 {
3970   tree wider_type = NULL;
3971   tree suba, subb;
3972   tree atype = TREE_TYPE (*a);
3973
3974   if (CONVERT_EXPR_P (*a))
3975     {
3976       suba = TREE_OPERAND (*a, 0);
3977       wider_type = TREE_TYPE (suba);
3978       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3979         return atype;
3980     }
3981   else
3982     return atype;
3983
3984   if (CONVERT_EXPR_P (*b))
3985     {
3986       subb = TREE_OPERAND (*b, 0);
3987       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3988         return atype;
3989     }
3990   else
3991     return atype;
3992
3993   *a = suba;
3994   *b = subb;
3995   return wider_type;
3996 }
3997
3998 /* Determines the expression by that USE is expressed from induction variable
3999    CAND at statement AT in LOOP.  The expression is stored in two parts in a
4000    decomposed form.  The invariant part is stored in AFF_INV; while variant
4001    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
4002    non-null.  Returns false if USE cannot be expressed using CAND.  */
4003
4004 static bool
4005 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
4006                        struct iv_cand *cand, class aff_tree *aff_inv,
4007                        class aff_tree *aff_var, widest_int *prat = NULL)
4008 {
4009   tree ubase = use->iv->base, ustep = use->iv->step;
4010   tree cbase = cand->iv->base, cstep = cand->iv->step;
4011   tree common_type, uutype, var, cstep_common;
4012   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4013   aff_tree aff_cbase;
4014   widest_int rat;
4015
4016   /* We must have a precision to express the values of use.  */
4017   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4018     return false;
4019
4020   var = var_at_stmt (loop, cand, at);
4021   uutype = unsigned_type_for (utype);
4022
4023   /* If the conversion is not noop, perform it.  */
4024   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4025     {
4026       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4027           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4028         {
4029           tree inner_base, inner_step, inner_type;
4030           inner_base = TREE_OPERAND (cbase, 0);
4031           if (CONVERT_EXPR_P (cstep))
4032             inner_step = TREE_OPERAND (cstep, 0);
4033           else
4034             inner_step = cstep;
4035
4036           inner_type = TREE_TYPE (inner_base);
4037           /* If candidate is added from a biv whose type is smaller than
4038              ctype, we know both candidate and the biv won't overflow.
4039              In this case, it's safe to skip the convertion in candidate.
4040              As an example, (unsigned short)((unsigned long)A) equals to
4041              (unsigned short)A, if A has a type no larger than short.  */
4042           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4043             {
4044               cbase = inner_base;
4045               cstep = inner_step;
4046             }
4047         }
4048       cbase = fold_convert (uutype, cbase);
4049       cstep = fold_convert (uutype, cstep);
4050       var = fold_convert (uutype, var);
4051     }
4052
4053   /* Ratio is 1 when computing the value of biv cand by itself.
4054      We can't rely on constant_multiple_of in this case because the
4055      use is created after the original biv is selected.  The call
4056      could fail because of inconsistent fold behavior.  See PR68021
4057      for more information.  */
4058   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4059     {
4060       gcc_assert (is_gimple_assign (use->stmt));
4061       gcc_assert (use->iv->ssa_name == cand->var_after);
4062       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4063       rat = 1;
4064     }
4065   else if (!constant_multiple_of (ustep, cstep, &rat))
4066     return false;
4067
4068   if (prat)
4069     *prat = rat;
4070
4071   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4072      type, we achieve better folding by computing their difference in this
4073      wider type, and cast the result to UUTYPE.  We do not need to worry about
4074      overflows, as all the arithmetics will in the end be performed in UUTYPE
4075      anyway.  */
4076   common_type = determine_common_wider_type (&ubase, &cbase);
4077
4078   /* use = ubase - ratio * cbase + ratio * var.  */
4079   tree_to_aff_combination (ubase, common_type, aff_inv);
4080   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4081   tree_to_aff_combination (var, uutype, aff_var);
4082
4083   /* We need to shift the value if we are after the increment.  */
4084   if (stmt_after_increment (loop, cand, at))
4085     {
4086       aff_tree cstep_aff;
4087
4088       if (common_type != uutype)
4089         cstep_common = fold_convert (common_type, cstep);
4090       else
4091         cstep_common = cstep;
4092
4093       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4094       aff_combination_add (&aff_cbase, &cstep_aff);
4095     }
4096
4097   aff_combination_scale (&aff_cbase, -rat);
4098   aff_combination_add (aff_inv, &aff_cbase);
4099   if (common_type != uutype)
4100     aff_combination_convert (aff_inv, uutype);
4101
4102   aff_combination_scale (aff_var, rat);
4103   return true;
4104 }
4105
4106 /* Determines the expression by that USE is expressed from induction variable
4107    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4108    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4109
4110 static bool
4111 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4112                      struct iv_cand *cand, class aff_tree *aff)
4113 {
4114   aff_tree aff_var;
4115
4116   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4117     return false;
4118
4119   aff_combination_add (aff, &aff_var);
4120   return true;
4121 }
4122
4123 /* Return the type of USE.  */
4124
4125 static tree
4126 get_use_type (struct iv_use *use)
4127 {
4128   tree base_type = TREE_TYPE (use->iv->base);
4129   tree type;
4130
4131   if (use->type == USE_REF_ADDRESS)
4132     {
4133       /* The base_type may be a void pointer.  Create a pointer type based on
4134          the mem_ref instead.  */
4135       type = build_pointer_type (TREE_TYPE (*use->op_p));
4136       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4137                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4138     }
4139   else
4140     type = base_type;
4141
4142   return type;
4143 }
4144
4145 /* Determines the expression by that USE is expressed from induction variable
4146    CAND at statement AT in LOOP.  The computation is unshared.  */
4147
4148 static tree
4149 get_computation_at (class loop *loop, gimple *at,
4150                     struct iv_use *use, struct iv_cand *cand)
4151 {
4152   aff_tree aff;
4153   tree type = get_use_type (use);
4154
4155   if (!get_computation_aff (loop, at, use, cand, &aff))
4156     return NULL_TREE;
4157   unshare_aff_combination (&aff);
4158   return fold_convert (type, aff_combination_to_tree (&aff));
4159 }
4160
4161 /* Like get_computation_at, but try harder, even if the computation
4162    is more expensive.  Intended for debug stmts.  */
4163
4164 static tree
4165 get_debug_computation_at (class loop *loop, gimple *at,
4166                           struct iv_use *use, struct iv_cand *cand)
4167 {
4168   if (tree ret = get_computation_at (loop, at, use, cand))
4169     return ret;
4170
4171   tree ubase = use->iv->base, ustep = use->iv->step;
4172   tree cbase = cand->iv->base, cstep = cand->iv->step;
4173   tree var;
4174   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4175   widest_int rat;
4176
4177   /* We must have a precision to express the values of use.  */
4178   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4179     return NULL_TREE;
4180
4181   /* Try to handle the case that get_computation_at doesn't,
4182      try to express
4183      use = ubase + (var - cbase) / ratio.  */
4184   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4185                              &rat))
4186     return NULL_TREE;
4187
4188   bool neg_p = false;
4189   if (wi::neg_p (rat))
4190     {
4191       if (TYPE_UNSIGNED (ctype))
4192         return NULL_TREE;
4193       neg_p = true;
4194       rat = wi::neg (rat);
4195     }
4196
4197   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4198      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4199      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4200      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4201      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4202   if (!use->iv->no_overflow
4203       && !cand->iv->no_overflow
4204       && !integer_pow2p (cstep))
4205     return NULL_TREE;
4206
4207   int bits = wi::exact_log2 (rat);
4208   if (bits == -1)
4209     bits = wi::floor_log2 (rat) + 1;
4210   if (!cand->iv->no_overflow
4211       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4212     return NULL_TREE;
4213
4214   var = var_at_stmt (loop, cand, at);
4215
4216   if (POINTER_TYPE_P (ctype))
4217     {
4218       ctype = unsigned_type_for (ctype);
4219       cbase = fold_convert (ctype, cbase);
4220       cstep = fold_convert (ctype, cstep);
4221       var = fold_convert (ctype, var);
4222     }
4223
4224   if (stmt_after_increment (loop, cand, at))
4225     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4226                        unshare_expr (cstep));
4227
4228   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4229   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4230                      wide_int_to_tree (TREE_TYPE (var), rat));
4231   if (POINTER_TYPE_P (utype))
4232     {
4233       var = fold_convert (sizetype, var);
4234       if (neg_p)
4235         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4236       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4237     }
4238   else
4239     {
4240       var = fold_convert (utype, var);
4241       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4242                          ubase, var);
4243     }
4244   return var;
4245 }
4246
4247 /* Adjust the cost COST for being in loop setup rather than loop body.
4248    If we're optimizing for space, the loop setup overhead is constant;
4249    if we're optimizing for speed, amortize it over the per-iteration cost.
4250    If ROUND_UP_P is true, the result is round up rather than to zero when
4251    optimizing for speed.  */
4252 static int64_t
4253 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4254                    bool round_up_p = false)
4255 {
4256   if (cost == INFTY)
4257     return cost;
4258   else if (optimize_loop_for_speed_p (data->current_loop))
4259     {
4260       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4261       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4262     }
4263   else
4264     return cost;
4265 }
4266
4267 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4268    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4269    calculating the operands of EXPR.  Returns true if successful, and returns
4270    the cost in COST.  */
4271
4272 static bool
4273 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4274                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4275 {
4276   comp_cost res;
4277   tree op1 = TREE_OPERAND (expr, 1);
4278   tree cst = TREE_OPERAND (mult, 1);
4279   tree multop = TREE_OPERAND (mult, 0);
4280   int m = exact_log2 (int_cst_value (cst));
4281   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4282   int as_cost, sa_cost;
4283   bool mult_in_op1;
4284
4285   if (!(m >= 0 && m < maxm))
4286     return false;
4287
4288   STRIP_NOPS (op1);
4289   mult_in_op1 = operand_equal_p (op1, mult, 0);
4290
4291   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4292
4293   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4294      use that in preference to a shift insn followed by an add insn.  */
4295   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4296              ? shiftadd_cost (speed, mode, m)
4297              : (mult_in_op1
4298                 ? shiftsub1_cost (speed, mode, m)
4299                 : shiftsub0_cost (speed, mode, m)));
4300
4301   res = comp_cost (MIN (as_cost, sa_cost), 0);
4302   res += (mult_in_op1 ? cost0 : cost1);
4303
4304   STRIP_NOPS (multop);
4305   if (!is_gimple_val (multop))
4306     res += force_expr_to_var_cost (multop, speed);
4307
4308   *cost = res;
4309   return true;
4310 }
4311
4312 /* Estimates cost of forcing expression EXPR into a variable.  */
4313
4314 static comp_cost
4315 force_expr_to_var_cost (tree expr, bool speed)
4316 {
4317   static bool costs_initialized = false;
4318   static unsigned integer_cost [2];
4319   static unsigned symbol_cost [2];
4320   static unsigned address_cost [2];
4321   tree op0, op1;
4322   comp_cost cost0, cost1, cost;
4323   machine_mode mode;
4324   scalar_int_mode int_mode;
4325
4326   if (!costs_initialized)
4327     {
4328       tree type = build_pointer_type (integer_type_node);
4329       tree var, addr;
4330       rtx x;
4331       int i;
4332
4333       var = create_tmp_var_raw (integer_type_node, "test_var");
4334       TREE_STATIC (var) = 1;
4335       x = produce_memory_decl_rtl (var, NULL);
4336       SET_DECL_RTL (var, x);
4337
4338       addr = build1 (ADDR_EXPR, type, var);
4339
4340
4341       for (i = 0; i < 2; i++)
4342         {
4343           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4344                                                              2000), i);
4345
4346           symbol_cost[i] = computation_cost (addr, i) + 1;
4347
4348           address_cost[i]
4349             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4350           if (dump_file && (dump_flags & TDF_DETAILS))
4351             {
4352               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4353               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4354               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4355               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4356               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4357               fprintf (dump_file, "\n");
4358             }
4359         }
4360
4361       costs_initialized = true;
4362     }
4363
4364   STRIP_NOPS (expr);
4365
4366   if (SSA_VAR_P (expr))
4367     return no_cost;
4368
4369   if (is_gimple_min_invariant (expr))
4370     {
4371       if (poly_int_tree_p (expr))
4372         return comp_cost (integer_cost [speed], 0);
4373
4374       if (TREE_CODE (expr) == ADDR_EXPR)
4375         {
4376           tree obj = TREE_OPERAND (expr, 0);
4377
4378           if (VAR_P (obj)
4379               || TREE_CODE (obj) == PARM_DECL
4380               || TREE_CODE (obj) == RESULT_DECL)
4381             return comp_cost (symbol_cost [speed], 0);
4382         }
4383
4384       return comp_cost (address_cost [speed], 0);
4385     }
4386
4387   switch (TREE_CODE (expr))
4388     {
4389     case POINTER_PLUS_EXPR:
4390     case PLUS_EXPR:
4391     case MINUS_EXPR:
4392     case MULT_EXPR:
4393     case TRUNC_DIV_EXPR:
4394     case BIT_AND_EXPR:
4395     case BIT_IOR_EXPR:
4396     case LSHIFT_EXPR:
4397     case RSHIFT_EXPR:
4398       op0 = TREE_OPERAND (expr, 0);
4399       op1 = TREE_OPERAND (expr, 1);
4400       STRIP_NOPS (op0);
4401       STRIP_NOPS (op1);
4402       break;
4403
4404     CASE_CONVERT:
4405     case NEGATE_EXPR:
4406     case BIT_NOT_EXPR:
4407       op0 = TREE_OPERAND (expr, 0);
4408       STRIP_NOPS (op0);
4409       op1 = NULL_TREE;
4410       break;
4411     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4412        introduce COND_EXPR for IV base, need to support better cost estimation
4413        for this COND_EXPR and tcc_comparison.  */
4414     case COND_EXPR:
4415       op0 = TREE_OPERAND (expr, 1);
4416       STRIP_NOPS (op0);
4417       op1 = TREE_OPERAND (expr, 2);
4418       STRIP_NOPS (op1);
4419       break;
4420     case LT_EXPR:
4421     case LE_EXPR:
4422     case GT_EXPR:
4423     case GE_EXPR:
4424     case EQ_EXPR:
4425     case NE_EXPR:
4426     case UNORDERED_EXPR:
4427     case ORDERED_EXPR:
4428     case UNLT_EXPR:
4429     case UNLE_EXPR:
4430     case UNGT_EXPR:
4431     case UNGE_EXPR:
4432     case UNEQ_EXPR:
4433     case LTGT_EXPR:
4434     case MAX_EXPR:
4435     case MIN_EXPR:
4436       op0 = TREE_OPERAND (expr, 0);
4437       STRIP_NOPS (op0);
4438       op1 = TREE_OPERAND (expr, 1);
4439       STRIP_NOPS (op1);
4440       break;
4441
4442     default:
4443       /* Just an arbitrary value, FIXME.  */
4444       return comp_cost (target_spill_cost[speed], 0);
4445     }
4446
4447   if (op0 == NULL_TREE
4448       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4449     cost0 = no_cost;
4450   else
4451     cost0 = force_expr_to_var_cost (op0, speed);
4452
4453   if (op1 == NULL_TREE
4454       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4455     cost1 = no_cost;
4456   else
4457     cost1 = force_expr_to_var_cost (op1, speed);
4458
4459   mode = TYPE_MODE (TREE_TYPE (expr));
4460   switch (TREE_CODE (expr))
4461     {
4462     case POINTER_PLUS_EXPR:
4463     case PLUS_EXPR:
4464     case MINUS_EXPR:
4465     case NEGATE_EXPR:
4466       cost = comp_cost (add_cost (speed, mode), 0);
4467       if (TREE_CODE (expr) != NEGATE_EXPR)
4468         {
4469           tree mult = NULL_TREE;
4470           comp_cost sa_cost;
4471           if (TREE_CODE (op1) == MULT_EXPR)
4472             mult = op1;
4473           else if (TREE_CODE (op0) == MULT_EXPR)
4474             mult = op0;
4475
4476           if (mult != NULL_TREE
4477               && is_a <scalar_int_mode> (mode, &int_mode)
4478               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4479               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4480                                     speed, &sa_cost))
4481             return sa_cost;
4482         }
4483       break;
4484
4485     CASE_CONVERT:
4486       {
4487         tree inner_mode, outer_mode;
4488         outer_mode = TREE_TYPE (expr);
4489         inner_mode = TREE_TYPE (op0);
4490         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4491                                        TYPE_MODE (inner_mode), speed), 0);
4492       }
4493       break;
4494
4495     case MULT_EXPR:
4496       if (cst_and_fits_in_hwi (op0))
4497         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4498                                              mode, speed), 0);
4499       else if (cst_and_fits_in_hwi (op1))
4500         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4501                                              mode, speed), 0);
4502       else
4503         return comp_cost (target_spill_cost [speed], 0);
4504       break;
4505
4506     case TRUNC_DIV_EXPR:
4507       /* Division by power of two is usually cheap, so we allow it.  Forbid
4508          anything else.  */
4509       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4510         cost = comp_cost (add_cost (speed, mode), 0);
4511       else
4512         cost = comp_cost (target_spill_cost[speed], 0);
4513       break;
4514
4515     case BIT_AND_EXPR:
4516     case BIT_IOR_EXPR:
4517     case BIT_NOT_EXPR:
4518     case LSHIFT_EXPR:
4519     case RSHIFT_EXPR:
4520       cost = comp_cost (add_cost (speed, mode), 0);
4521       break;
4522     case COND_EXPR:
4523       op0 = TREE_OPERAND (expr, 0);
4524       STRIP_NOPS (op0);
4525       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4526           || CONSTANT_CLASS_P (op0))
4527         cost = no_cost;
4528       else
4529         cost = force_expr_to_var_cost (op0, speed);
4530       break;
4531     case LT_EXPR:
4532     case LE_EXPR:
4533     case GT_EXPR:
4534     case GE_EXPR:
4535     case EQ_EXPR:
4536     case NE_EXPR:
4537     case UNORDERED_EXPR:
4538     case ORDERED_EXPR:
4539     case UNLT_EXPR:
4540     case UNLE_EXPR:
4541     case UNGT_EXPR:
4542     case UNGE_EXPR:
4543     case UNEQ_EXPR:
4544     case LTGT_EXPR:
4545     case MAX_EXPR:
4546     case MIN_EXPR:
4547       /* Simply use add cost for now, FIXME if there is some more accurate cost
4548          evaluation way.  */
4549       cost = comp_cost (add_cost (speed, mode), 0);
4550       break;
4551
4552     default:
4553       gcc_unreachable ();
4554     }
4555
4556   cost += cost0;
4557   cost += cost1;
4558   return cost;
4559 }
4560
4561 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4562    invariants the computation depends on.  */
4563
4564 static comp_cost
4565 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4566 {
4567   if (!expr)
4568     return no_cost;
4569
4570   find_inv_vars (data, &expr, inv_vars);
4571   return force_expr_to_var_cost (expr, data->speed);
4572 }
4573
4574 /* Returns cost of auto-modifying address expression in shape base + offset.
4575    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4576    address expression.  The address expression has ADDR_MODE in addr space
4577    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4578    speed or size.  */
4579
4580 enum ainc_type
4581 {
4582   AINC_PRE_INC,         /* Pre increment.  */
4583   AINC_PRE_DEC,         /* Pre decrement.  */
4584   AINC_POST_INC,        /* Post increment.  */
4585   AINC_POST_DEC,        /* Post decrement.  */
4586   AINC_NONE             /* Also the number of auto increment types.  */
4587 };
4588
4589 struct ainc_cost_data
4590 {
4591   int64_t costs[AINC_NONE];
4592 };
4593
4594 static comp_cost
4595 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4596                        machine_mode addr_mode, machine_mode mem_mode,
4597                        addr_space_t as, bool speed)
4598 {
4599   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4600       && !USE_STORE_PRE_DECREMENT (mem_mode)
4601       && !USE_LOAD_POST_DECREMENT (mem_mode)
4602       && !USE_STORE_POST_DECREMENT (mem_mode)
4603       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4604       && !USE_STORE_PRE_INCREMENT (mem_mode)
4605       && !USE_LOAD_POST_INCREMENT (mem_mode)
4606       && !USE_STORE_POST_INCREMENT (mem_mode))
4607     return infinite_cost;
4608
4609   static vec<ainc_cost_data *> ainc_cost_data_list;
4610   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4611   if (idx >= ainc_cost_data_list.length ())
4612     {
4613       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4614
4615       gcc_assert (nsize > idx);
4616       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4617     }
4618
4619   ainc_cost_data *data = ainc_cost_data_list[idx];
4620   if (data == NULL)
4621     {
4622       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4623
4624       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4625       data->costs[AINC_PRE_DEC] = INFTY;
4626       data->costs[AINC_POST_DEC] = INFTY;
4627       data->costs[AINC_PRE_INC] = INFTY;
4628       data->costs[AINC_POST_INC] = INFTY;
4629       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4630           || USE_STORE_PRE_DECREMENT (mem_mode))
4631         {
4632           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4633
4634           if (memory_address_addr_space_p (mem_mode, addr, as))
4635             data->costs[AINC_PRE_DEC]
4636               = address_cost (addr, mem_mode, as, speed);
4637         }
4638       if (USE_LOAD_POST_DECREMENT (mem_mode)
4639           || USE_STORE_POST_DECREMENT (mem_mode))
4640         {
4641           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4642
4643           if (memory_address_addr_space_p (mem_mode, addr, as))
4644             data->costs[AINC_POST_DEC]
4645               = address_cost (addr, mem_mode, as, speed);
4646         }
4647       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4648           || USE_STORE_PRE_INCREMENT (mem_mode))
4649         {
4650           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4651
4652           if (memory_address_addr_space_p (mem_mode, addr, as))
4653             data->costs[AINC_PRE_INC]
4654               = address_cost (addr, mem_mode, as, speed);
4655         }
4656       if (USE_LOAD_POST_INCREMENT (mem_mode)
4657           || USE_STORE_POST_INCREMENT (mem_mode))
4658         {
4659           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4660
4661           if (memory_address_addr_space_p (mem_mode, addr, as))
4662             data->costs[AINC_POST_INC]
4663               = address_cost (addr, mem_mode, as, speed);
4664         }
4665       ainc_cost_data_list[idx] = data;
4666     }
4667
4668   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4669   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4670     return comp_cost (data->costs[AINC_POST_INC], 0);
4671   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4672     return comp_cost (data->costs[AINC_POST_DEC], 0);
4673   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4674     return comp_cost (data->costs[AINC_PRE_INC], 0);
4675   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4676     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4677
4678   return infinite_cost;
4679 }
4680
4681 /* Return cost of computing USE's address expression by using CAND.
4682    AFF_INV and AFF_VAR represent invariant and variant parts of the
4683    address expression, respectively.  If AFF_INV is simple, store
4684    the loop invariant variables which are depended by it in INV_VARS;
4685    if AFF_INV is complicated, handle it as a new invariant expression
4686    and record it in INV_EXPR.  RATIO indicates multiple times between
4687    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4688    value to it indicating if this is an auto-increment address.  */
4689
4690 static comp_cost
4691 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4692                   struct iv_cand *cand, aff_tree *aff_inv,
4693                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4694                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4695                   bool *can_autoinc, bool speed)
4696 {
4697   rtx addr;
4698   bool simple_inv = true;
4699   tree comp_inv = NULL_TREE, type = aff_var->type;
4700   comp_cost var_cost = no_cost, cost = no_cost;
4701   struct mem_address parts = {NULL_TREE, integer_one_node,
4702                               NULL_TREE, NULL_TREE, NULL_TREE};
4703   machine_mode addr_mode = TYPE_MODE (type);
4704   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4705   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4706   /* Only true if ratio != 1.  */
4707   bool ok_with_ratio_p = false;
4708   bool ok_without_ratio_p = false;
4709   code_helper code = ERROR_MARK;
4710
4711   if (use->type == USE_PTR_ADDRESS)
4712     {
4713       gcall *call = as_a<gcall *> (use->stmt);
4714       gcc_assert (gimple_call_internal_p (call));
4715       code = gimple_call_internal_fn (call);
4716     }
4717
4718   if (!aff_combination_const_p (aff_inv))
4719     {
4720       parts.index = integer_one_node;
4721       /* Addressing mode "base + index".  */
4722       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4723       if (ratio != 1)
4724         {
4725           parts.step = wide_int_to_tree (type, ratio);
4726           /* Addressing mode "base + index << scale".  */
4727           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4728           if (!ok_with_ratio_p)
4729             parts.step = NULL_TREE;
4730         }
4731       if (ok_with_ratio_p || ok_without_ratio_p)
4732         {
4733           if (maybe_ne (aff_inv->offset, 0))
4734             {
4735               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4736               /* Addressing mode "base + index [<< scale] + offset".  */
4737               if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4738                 parts.offset = NULL_TREE;
4739               else
4740                 aff_inv->offset = 0;
4741             }
4742
4743           move_fixed_address_to_symbol (&parts, aff_inv);
4744           /* Base is fixed address and is moved to symbol part.  */
4745           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4746             parts.base = NULL_TREE;
4747
4748           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4749           if (parts.symbol != NULL_TREE
4750               && !valid_mem_ref_p (mem_mode, as, &parts, code))
4751             {
4752               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4753               parts.symbol = NULL_TREE;
4754               /* Reset SIMPLE_INV since symbol address needs to be computed
4755                  outside of address expression in this case.  */
4756               simple_inv = false;
4757               /* Symbol part is moved back to base part, it can't be NULL.  */
4758               parts.base = integer_one_node;
4759             }
4760         }
4761       else
4762         parts.index = NULL_TREE;
4763     }
4764   else
4765     {
4766       poly_int64 ainc_step;
4767       if (can_autoinc
4768           && ratio == 1
4769           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4770         {
4771           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4772
4773           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4774             ainc_offset += ainc_step;
4775           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4776                                         addr_mode, mem_mode, as, speed);
4777           if (!cost.infinite_cost_p ())
4778             {
4779               *can_autoinc = true;
4780               return cost;
4781             }
4782           cost = no_cost;
4783         }
4784       if (!aff_combination_zero_p (aff_inv))
4785         {
4786           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4787           /* Addressing mode "base + offset".  */
4788           if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4789             parts.offset = NULL_TREE;
4790           else
4791             aff_inv->offset = 0;
4792         }
4793     }
4794
4795   if (simple_inv)
4796     simple_inv = (aff_inv == NULL
4797                   || aff_combination_const_p (aff_inv)
4798                   || aff_combination_singleton_var_p (aff_inv));
4799   if (!aff_combination_zero_p (aff_inv))
4800     comp_inv = aff_combination_to_tree (aff_inv);
4801   if (comp_inv != NULL_TREE)
4802     cost = force_var_cost (data, comp_inv, inv_vars);
4803   if (ratio != 1 && parts.step == NULL_TREE)
4804     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4805   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4806     var_cost += add_cost (speed, addr_mode);
4807
4808   if (comp_inv && inv_expr && !simple_inv)
4809     {
4810       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4811       /* Clear depends on.  */
4812       if (*inv_expr != NULL && inv_vars && *inv_vars)
4813         bitmap_clear (*inv_vars);
4814
4815       /* Cost of small invariant expression adjusted against loop niters
4816          is usually zero, which makes it difficult to be differentiated
4817          from candidate based on loop invariant variables.  Secondly, the
4818          generated invariant expression may not be hoisted out of loop by
4819          following pass.  We penalize the cost by rounding up in order to
4820          neutralize such effects.  */
4821       cost.cost = adjust_setup_cost (data, cost.cost, true);
4822       cost.scratch = cost.cost;
4823     }
4824
4825   cost += var_cost;
4826   addr = addr_for_mem_ref (&parts, as, false);
4827   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4828   cost += address_cost (addr, mem_mode, as, speed);
4829
4830   if (parts.symbol != NULL_TREE)
4831     cost.complexity += 1;
4832   /* Don't increase the complexity of adding a scaled index if it's
4833      the only kind of index that the target allows.  */
4834   if (parts.step != NULL_TREE && ok_without_ratio_p)
4835     cost.complexity += 1;
4836   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4837     cost.complexity += 1;
4838   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4839     cost.complexity += 1;
4840
4841   return cost;
4842 }
4843
4844 /* Scale (multiply) the computed COST (except scratch part that should be
4845    hoisted out a loop) by header->frequency / AT->frequency, which makes
4846    expected cost more accurate.  */
4847
4848 static comp_cost
4849 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4850 {
4851   if (data->speed
4852       && data->current_loop->header->count.to_frequency (cfun) > 0)
4853     {
4854       basic_block bb = gimple_bb (at);
4855       gcc_assert (cost.scratch <= cost.cost);
4856       int scale_factor = (int)(intptr_t) bb->aux;
4857       if (scale_factor == 1)
4858         return cost;
4859
4860       int64_t scaled_cost
4861         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4862
4863       if (dump_file && (dump_flags & TDF_DETAILS))
4864         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4865                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4866                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4867
4868       cost.cost = scaled_cost;
4869     }
4870
4871   return cost;
4872 }
4873
4874 /* Determines the cost of the computation by that USE is expressed
4875    from induction variable CAND.  If ADDRESS_P is true, we just need
4876    to create an address from it, otherwise we want to get it into
4877    register.  A set of invariants we depend on is stored in INV_VARS.
4878    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4879    addressing is likely.  If INV_EXPR is nonnull, record invariant
4880    expr entry in it.  */
4881
4882 static comp_cost
4883 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4884                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4885                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4886 {
4887   gimple *at = use->stmt;
4888   tree ubase = use->iv->base, cbase = cand->iv->base;
4889   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4890   tree comp_inv = NULL_TREE;
4891   HOST_WIDE_INT ratio, aratio;
4892   comp_cost cost;
4893   widest_int rat;
4894   aff_tree aff_inv, aff_var;
4895   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4896
4897   if (inv_vars)
4898     *inv_vars = NULL;
4899   if (can_autoinc)
4900     *can_autoinc = false;
4901   if (inv_expr)
4902     *inv_expr = NULL;
4903
4904   /* Check if we have enough precision to express the values of use.  */
4905   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4906     return infinite_cost;
4907
4908   if (address_p
4909       || (use->iv->base_object
4910           && cand->iv->base_object
4911           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4912           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4913     {
4914       /* Do not try to express address of an object with computation based
4915          on address of a different object.  This may cause problems in rtl
4916          level alias analysis (that does not expect this to be happening,
4917          as this is illegal in C), and would be unlikely to be useful
4918          anyway.  */
4919       if (use->iv->base_object
4920           && cand->iv->base_object
4921           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4922         return infinite_cost;
4923     }
4924
4925   if (!get_computation_aff_1 (data->current_loop, at, use,
4926                               cand, &aff_inv, &aff_var, &rat)
4927       || !wi::fits_shwi_p (rat))
4928     return infinite_cost;
4929
4930   ratio = rat.to_shwi ();
4931   if (address_p)
4932     {
4933       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4934                                inv_vars, inv_expr, can_autoinc, speed);
4935       cost = get_scaled_computation_cost_at (data, at, cost);
4936       /* For doloop IV cand, add on the extra cost.  */
4937       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4938       return cost;
4939     }
4940
4941   bool simple_inv = (aff_combination_const_p (&aff_inv)
4942                      || aff_combination_singleton_var_p (&aff_inv));
4943   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4944   aff_combination_convert (&aff_inv, signed_type);
4945   if (!aff_combination_zero_p (&aff_inv))
4946     comp_inv = aff_combination_to_tree (&aff_inv);
4947
4948   cost = force_var_cost (data, comp_inv, inv_vars);
4949   if (comp_inv && inv_expr && !simple_inv)
4950     {
4951       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4952       /* Clear depends on.  */
4953       if (*inv_expr != NULL && inv_vars && *inv_vars)
4954         bitmap_clear (*inv_vars);
4955
4956       cost.cost = adjust_setup_cost (data, cost.cost);
4957       /* Record setup cost in scratch field.  */
4958       cost.scratch = cost.cost;
4959     }
4960   /* Cost of constant integer can be covered when adding invariant part to
4961      variant part.  */
4962   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4963     cost = no_cost;
4964
4965   /* Need type narrowing to represent use with cand.  */
4966   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4967     {
4968       machine_mode outer_mode = TYPE_MODE (utype);
4969       machine_mode inner_mode = TYPE_MODE (ctype);
4970       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4971     }
4972
4973   /* Turn a + i * (-c) into a - i * c.  */
4974   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4975     aratio = -ratio;
4976   else
4977     aratio = ratio;
4978
4979   if (ratio != 1)
4980     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4981
4982   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4983      instruction.  */
4984   /* Need to add up the invariant and variant parts.  */
4985   if (comp_inv && !integer_zerop (comp_inv))
4986     cost += add_cost (speed, TYPE_MODE (utype));
4987
4988   cost = get_scaled_computation_cost_at (data, at, cost);
4989
4990   /* For doloop IV cand, add on the extra cost.  */
4991   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4992     cost += targetm.doloop_cost_for_generic;
4993
4994   return cost;
4995 }
4996
4997 /* Determines cost of computing the use in GROUP with CAND in a generic
4998    expression.  */
4999
5000 static bool
5001 determine_group_iv_cost_generic (struct ivopts_data *data,
5002                                  struct iv_group *group, struct iv_cand *cand)
5003 {
5004   comp_cost cost;
5005   iv_inv_expr_ent *inv_expr = NULL;
5006   bitmap inv_vars = NULL, inv_exprs = NULL;
5007   struct iv_use *use = group->vuses[0];
5008
5009   /* The simple case first -- if we need to express value of the preserved
5010      original biv, the cost is 0.  This also prevents us from counting the
5011      cost of increment twice -- once at this use and once in the cost of
5012      the candidate.  */
5013   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5014     cost = no_cost;
5015   /* If the IV candidate involves undefined SSA values and is not the
5016      same IV as on the USE avoid using that candidate here.  */
5017   else if (cand->involves_undefs
5018            && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
5019     return false;
5020   else
5021     cost = get_computation_cost (data, use, cand, false,
5022                                  &inv_vars, NULL, &inv_expr);
5023
5024   if (inv_expr)
5025     {
5026       inv_exprs = BITMAP_ALLOC (NULL);
5027       bitmap_set_bit (inv_exprs, inv_expr->id);
5028     }
5029   set_group_iv_cost (data, group, cand, cost, inv_vars,
5030                      NULL_TREE, ERROR_MARK, inv_exprs);
5031   return !cost.infinite_cost_p ();
5032 }
5033
5034 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5035
5036 static bool
5037 determine_group_iv_cost_address (struct ivopts_data *data,
5038                                  struct iv_group *group, struct iv_cand *cand)
5039 {
5040   unsigned i;
5041   bitmap inv_vars = NULL, inv_exprs = NULL;
5042   bool can_autoinc;
5043   iv_inv_expr_ent *inv_expr = NULL;
5044   struct iv_use *use = group->vuses[0];
5045   comp_cost sum_cost = no_cost, cost;
5046
5047   cost = get_computation_cost (data, use, cand, true,
5048                                &inv_vars, &can_autoinc, &inv_expr);
5049
5050   if (inv_expr)
5051     {
5052       inv_exprs = BITMAP_ALLOC (NULL);
5053       bitmap_set_bit (inv_exprs, inv_expr->id);
5054     }
5055   sum_cost = cost;
5056   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5057     {
5058       if (can_autoinc)
5059         sum_cost -= cand->cost_step;
5060       /* If we generated the candidate solely for exploiting autoincrement
5061          opportunities, and it turns out it can't be used, set the cost to
5062          infinity to make sure we ignore it.  */
5063       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5064         sum_cost = infinite_cost;
5065     }
5066
5067   /* Uses in a group can share setup code, so only add setup cost once.  */
5068   cost -= cost.scratch;
5069   /* Compute and add costs for rest uses of this group.  */
5070   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5071     {
5072       struct iv_use *next = group->vuses[i];
5073
5074       /* TODO: We could skip computing cost for sub iv_use when it has the
5075          same cost as the first iv_use, but the cost really depends on the
5076          offset and where the iv_use is.  */
5077         cost = get_computation_cost (data, next, cand, true,
5078                                      NULL, &can_autoinc, &inv_expr);
5079         if (inv_expr)
5080           {
5081             if (!inv_exprs)
5082               inv_exprs = BITMAP_ALLOC (NULL);
5083
5084             bitmap_set_bit (inv_exprs, inv_expr->id);
5085           }
5086       sum_cost += cost;
5087     }
5088   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5089                      NULL_TREE, ERROR_MARK, inv_exprs);
5090
5091   return !sum_cost.infinite_cost_p ();
5092 }
5093
5094 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5095    and stores it to VAL.  */
5096
5097 static void
5098 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5099                class tree_niter_desc *desc, aff_tree *val)
5100 {
5101   aff_tree step, delta, nit;
5102   struct iv *iv = cand->iv;
5103   tree type = TREE_TYPE (iv->base);
5104   tree niter = desc->niter;
5105   bool after_adjust = stmt_after_increment (loop, cand, at);
5106   tree steptype;
5107
5108   if (POINTER_TYPE_P (type))
5109     steptype = sizetype;
5110   else
5111     steptype = unsigned_type_for (type);
5112
5113   /* If AFTER_ADJUST is required, the code below generates the equivalent
5114      of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5115      BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5116      SSA_NAME - 1.  Unfortunately, guaranteeing that adding 1 to NITER
5117      doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5118      class for common idioms that we know are safe.  */
5119   if (after_adjust
5120       && desc->control.no_overflow
5121       && integer_onep (desc->control.step)
5122       && (desc->cmp == LT_EXPR
5123           || desc->cmp == NE_EXPR)
5124       && TREE_CODE (desc->bound) == SSA_NAME)
5125     {
5126       if (integer_onep (desc->control.base))
5127         {
5128           niter = desc->bound;
5129           after_adjust = false;
5130         }
5131       else if (TREE_CODE (niter) == MINUS_EXPR
5132                && integer_onep (TREE_OPERAND (niter, 1)))
5133         {
5134           niter = TREE_OPERAND (niter, 0);
5135           after_adjust = false;
5136         }
5137     }
5138
5139   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5140   aff_combination_convert (&step, steptype);
5141   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5142   aff_combination_convert (&nit, steptype);
5143   aff_combination_mult (&nit, &step, &delta);
5144   if (after_adjust)
5145     aff_combination_add (&delta, &step);
5146
5147   tree_to_aff_combination (iv->base, type, val);
5148   if (!POINTER_TYPE_P (type))
5149     aff_combination_convert (val, steptype);
5150   aff_combination_add (val, &delta);
5151 }
5152
5153 /* Returns period of induction variable iv.  */
5154
5155 static tree
5156 iv_period (struct iv *iv)
5157 {
5158   tree step = iv->step, period, type;
5159   tree pow2div;
5160
5161   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5162
5163   type = unsigned_type_for (TREE_TYPE (step));
5164   /* Period of the iv is lcm (step, type_range)/step -1,
5165      i.e., N*type_range/step - 1. Since type range is power
5166      of two, N == (step >> num_of_ending_zeros_binary (step),
5167      so the final result is
5168
5169        (type_range >> num_of_ending_zeros_binary (step)) - 1
5170
5171   */
5172   pow2div = num_ending_zeros (step);
5173
5174   period = build_low_bits_mask (type,
5175                                 (TYPE_PRECISION (type)
5176                                  - tree_to_uhwi (pow2div)));
5177
5178   return period;
5179 }
5180
5181 /* Returns the comparison operator used when eliminating the iv USE.  */
5182
5183 static enum tree_code
5184 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5185 {
5186   class loop *loop = data->current_loop;
5187   basic_block ex_bb;
5188   edge exit;
5189
5190   ex_bb = gimple_bb (use->stmt);
5191   exit = EDGE_SUCC (ex_bb, 0);
5192   if (flow_bb_inside_loop_p (loop, exit->dest))
5193     exit = EDGE_SUCC (ex_bb, 1);
5194
5195   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5196 }
5197
5198 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5199    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5200    calculation is performed in non-wrapping type.
5201
5202    TODO: More generally, we could test for the situation that
5203          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5204          This would require knowing the sign of OFFSET.  */
5205
5206 static bool
5207 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5208 {
5209   enum tree_code code;
5210   tree e1, e2;
5211   aff_tree aff_e1, aff_e2, aff_offset;
5212
5213   if (!nowrap_type_p (TREE_TYPE (base)))
5214     return false;
5215
5216   base = expand_simple_operations (base);
5217
5218   if (TREE_CODE (base) == SSA_NAME)
5219     {
5220       gimple *stmt = SSA_NAME_DEF_STMT (base);
5221
5222       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5223         return false;
5224
5225       code = gimple_assign_rhs_code (stmt);
5226       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5227         return false;
5228
5229       e1 = gimple_assign_rhs1 (stmt);
5230       e2 = gimple_assign_rhs2 (stmt);
5231     }
5232   else
5233     {
5234       code = TREE_CODE (base);
5235       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5236         return false;
5237       e1 = TREE_OPERAND (base, 0);
5238       e2 = TREE_OPERAND (base, 1);
5239     }
5240
5241   /* Use affine expansion as deeper inspection to prove the equality.  */
5242   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5243                                   &aff_e2, &data->name_expansion_cache);
5244   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5245                                   &aff_offset, &data->name_expansion_cache);
5246   aff_combination_scale (&aff_offset, -1);
5247   switch (code)
5248     {
5249     case PLUS_EXPR:
5250       aff_combination_add (&aff_e2, &aff_offset);
5251       if (aff_combination_zero_p (&aff_e2))
5252         return true;
5253
5254       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5255                                       &aff_e1, &data->name_expansion_cache);
5256       aff_combination_add (&aff_e1, &aff_offset);
5257       return aff_combination_zero_p (&aff_e1);
5258
5259     case POINTER_PLUS_EXPR:
5260       aff_combination_add (&aff_e2, &aff_offset);
5261       return aff_combination_zero_p (&aff_e2);
5262
5263     default:
5264       return false;
5265     }
5266 }
5267
5268 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5269    comparison with CAND.  NITER describes the number of iterations of
5270    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5271
5272    We aim to handle the following situation:
5273
5274    sometype *base, *p;
5275    int a, b, i;
5276
5277    i = a;
5278    p = p_0 = base + a;
5279
5280    do
5281      {
5282        bla (*p);
5283        p++;
5284        i++;
5285      }
5286    while (i < b);
5287
5288    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5289    We aim to optimize this to
5290
5291    p = p_0 = base + a;
5292    do
5293      {
5294        bla (*p);
5295        p++;
5296      }
5297    while (p < p_0 - a + b);
5298
5299    This preserves the correctness, since the pointer arithmetics does not
5300    overflow.  More precisely:
5301
5302    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5303       overflow in computing it or the values of p.
5304    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5305       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5306
5307 static bool
5308 iv_elimination_compare_lt (struct ivopts_data *data,
5309                            struct iv_cand *cand, enum tree_code *comp_p,
5310                            class tree_niter_desc *niter)
5311 {
5312   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5313   class aff_tree nit, tmpa, tmpb;
5314   enum tree_code comp;
5315   HOST_WIDE_INT step;
5316
5317   /* We need to know that the candidate induction variable does not overflow.
5318      While more complex analysis may be used to prove this, for now just
5319      check that the variable appears in the original program and that it
5320      is computed in a type that guarantees no overflows.  */
5321   cand_type = TREE_TYPE (cand->iv->base);
5322   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5323     return false;
5324
5325   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5326      the calculation of the BOUND could overflow, making the comparison
5327      invalid.  */
5328   if (!data->loop_single_exit_p)
5329     return false;
5330
5331   /* We need to be able to decide whether candidate is increasing or decreasing
5332      in order to choose the right comparison operator.  */
5333   if (!cst_and_fits_in_hwi (cand->iv->step))
5334     return false;
5335   step = int_cst_value (cand->iv->step);
5336
5337   /* Check that the number of iterations matches the expected pattern:
5338      a + 1 > b ? 0 : b - a - 1.  */
5339   mbz = niter->may_be_zero;
5340   if (TREE_CODE (mbz) == GT_EXPR)
5341     {
5342       /* Handle a + 1 > b.  */
5343       tree op0 = TREE_OPERAND (mbz, 0);
5344       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5345         {
5346           a = TREE_OPERAND (op0, 0);
5347           b = TREE_OPERAND (mbz, 1);
5348         }
5349       else
5350         return false;
5351     }
5352   else if (TREE_CODE (mbz) == LT_EXPR)
5353     {
5354       tree op1 = TREE_OPERAND (mbz, 1);
5355
5356       /* Handle b < a + 1.  */
5357       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5358         {
5359           a = TREE_OPERAND (op1, 0);
5360           b = TREE_OPERAND (mbz, 0);
5361         }
5362       else
5363         return false;
5364     }
5365   else
5366     return false;
5367
5368   /* Expected number of iterations is B - A - 1.  Check that it matches
5369      the actual number, i.e., that B - A - NITER = 1.  */
5370   tree_to_aff_combination (niter->niter, nit_type, &nit);
5371   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5372   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5373   aff_combination_scale (&nit, -1);
5374   aff_combination_scale (&tmpa, -1);
5375   aff_combination_add (&tmpb, &tmpa);
5376   aff_combination_add (&tmpb, &nit);
5377   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5378     return false;
5379
5380   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5381      overflow.  */
5382   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5383                         cand->iv->step,
5384                         fold_convert (TREE_TYPE (cand->iv->step), a));
5385   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5386     return false;
5387
5388   /* Determine the new comparison operator.  */
5389   comp = step < 0 ? GT_EXPR : LT_EXPR;
5390   if (*comp_p == NE_EXPR)
5391     *comp_p = comp;
5392   else if (*comp_p == EQ_EXPR)
5393     *comp_p = invert_tree_comparison (comp, false);
5394   else
5395     gcc_unreachable ();
5396
5397   return true;
5398 }
5399
5400 /* Check whether it is possible to express the condition in USE by comparison
5401    of candidate CAND.  If so, store the value compared with to BOUND, and the
5402    comparison operator to COMP.  */
5403
5404 static bool
5405 may_eliminate_iv (struct ivopts_data *data,
5406                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5407                   enum tree_code *comp)
5408 {
5409   basic_block ex_bb;
5410   edge exit;
5411   tree period;
5412   class loop *loop = data->current_loop;
5413   aff_tree bnd;
5414   class tree_niter_desc *desc = NULL;
5415
5416   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5417     return false;
5418
5419   /* For now works only for exits that dominate the loop latch.
5420      TODO: extend to other conditions inside loop body.  */
5421   ex_bb = gimple_bb (use->stmt);
5422   if (use->stmt != last_nondebug_stmt (ex_bb)
5423       || gimple_code (use->stmt) != GIMPLE_COND
5424       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5425     return false;
5426
5427   exit = EDGE_SUCC (ex_bb, 0);
5428   if (flow_bb_inside_loop_p (loop, exit->dest))
5429     exit = EDGE_SUCC (ex_bb, 1);
5430   if (flow_bb_inside_loop_p (loop, exit->dest))
5431     return false;
5432
5433   desc = niter_for_exit (data, exit);
5434   if (!desc)
5435     return false;
5436
5437   /* Determine whether we can use the variable to test the exit condition.
5438      This is the case iff the period of the induction variable is greater
5439      than the number of iterations for which the exit condition is true.  */
5440   period = iv_period (cand->iv);
5441
5442   /* If the number of iterations is constant, compare against it directly.  */
5443   if (TREE_CODE (desc->niter) == INTEGER_CST)
5444     {
5445       /* See cand_value_at.  */
5446       if (stmt_after_increment (loop, cand, use->stmt))
5447         {
5448           if (!tree_int_cst_lt (desc->niter, period))
5449             return false;
5450         }
5451       else
5452         {
5453           if (tree_int_cst_lt (period, desc->niter))
5454             return false;
5455         }
5456     }
5457
5458   /* If not, and if this is the only possible exit of the loop, see whether
5459      we can get a conservative estimate on the number of iterations of the
5460      entire loop and compare against that instead.  */
5461   else
5462     {
5463       widest_int period_value, max_niter;
5464
5465       max_niter = desc->max;
5466       if (stmt_after_increment (loop, cand, use->stmt))
5467         max_niter += 1;
5468       period_value = wi::to_widest (period);
5469       if (wi::gtu_p (max_niter, period_value))
5470         {
5471           /* See if we can take advantage of inferred loop bound
5472              information.  */
5473           if (data->loop_single_exit_p)
5474             {
5475               if (!max_loop_iterations (loop, &max_niter))
5476                 return false;
5477               /* The loop bound is already adjusted by adding 1.  */
5478               if (wi::gtu_p (max_niter, period_value))
5479                 return false;
5480             }
5481           else
5482             return false;
5483         }
5484     }
5485
5486   /* For doloop IV cand, the bound would be zero.  It's safe whether
5487      may_be_zero set or not.  */
5488   if (cand->doloop_p)
5489     {
5490       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5491       *comp = iv_elimination_compare (data, use);
5492       return true;
5493     }
5494
5495   cand_value_at (loop, cand, use->stmt, desc, &bnd);
5496
5497   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5498                          aff_combination_to_tree (&bnd));
5499   *comp = iv_elimination_compare (data, use);
5500
5501   /* It is unlikely that computing the number of iterations using division
5502      would be more profitable than keeping the original induction variable.  */
5503   if (expression_expensive_p (*bound))
5504     return false;
5505
5506   /* Sometimes, it is possible to handle the situation that the number of
5507      iterations may be zero unless additional assumptions by using <
5508      instead of != in the exit condition.
5509
5510      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5511            base the exit condition on it.  However, that is often too
5512            expensive.  */
5513   if (!integer_zerop (desc->may_be_zero))
5514     return iv_elimination_compare_lt (data, cand, comp, desc);
5515
5516   return true;
5517 }
5518
5519  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5520     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5521
5522 static int
5523 parm_decl_cost (struct ivopts_data *data, tree bound)
5524 {
5525   tree sbound = bound;
5526   STRIP_NOPS (sbound);
5527
5528   if (TREE_CODE (sbound) == SSA_NAME
5529       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5530       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5531       && data->body_includes_call)
5532     return COSTS_N_INSNS (1);
5533
5534   return 0;
5535 }
5536
5537 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5538
5539 static bool
5540 determine_group_iv_cost_cond (struct ivopts_data *data,
5541                               struct iv_group *group, struct iv_cand *cand)
5542 {
5543   tree bound = NULL_TREE;
5544   struct iv *cmp_iv;
5545   bitmap inv_exprs = NULL;
5546   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5547   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5548   enum comp_iv_rewrite rewrite_type;
5549   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5550   tree *control_var, *bound_cst;
5551   enum tree_code comp = ERROR_MARK;
5552   struct iv_use *use = group->vuses[0];
5553
5554   /* Extract condition operands.  */
5555   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5556                                         &bound_cst, NULL, &cmp_iv);
5557   gcc_assert (rewrite_type != COMP_IV_NA);
5558
5559   /* Try iv elimination.  */
5560   if (rewrite_type == COMP_IV_ELIM
5561       && may_eliminate_iv (data, use, cand, &bound, &comp))
5562     {
5563       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5564       if (elim_cost.cost == 0)
5565         elim_cost.cost = parm_decl_cost (data, bound);
5566       else if (TREE_CODE (bound) == INTEGER_CST)
5567         elim_cost.cost = 0;
5568       /* If we replace a loop condition 'i < n' with 'p < base + n',
5569          inv_vars_elim will have 'base' and 'n' set, which implies that both
5570          'base' and 'n' will be live during the loop.    More likely,
5571          'base + n' will be loop invariant, resulting in only one live value
5572          during the loop.  So in that case we clear inv_vars_elim and set
5573          inv_expr_elim instead.  */
5574       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5575         {
5576           inv_expr_elim = get_loop_invariant_expr (data, bound);
5577           bitmap_clear (inv_vars_elim);
5578         }
5579       /* The bound is a loop invariant, so it will be only computed
5580          once.  */
5581       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5582     }
5583
5584   /* When the condition is a comparison of the candidate IV against
5585      zero, prefer this IV.
5586
5587      TODO: The constant that we're subtracting from the cost should
5588      be target-dependent.  This information should be added to the
5589      target costs for each backend.  */
5590   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5591       && integer_zerop (*bound_cst)
5592       && (operand_equal_p (*control_var, cand->var_after, 0)
5593           || operand_equal_p (*control_var, cand->var_before, 0)))
5594     elim_cost -= 1;
5595
5596   express_cost = get_computation_cost (data, use, cand, false,
5597                                        &inv_vars_express, NULL,
5598                                        &inv_expr_express);
5599   if (cmp_iv != NULL)
5600     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5601
5602   /* Count the cost of the original bound as well.  */
5603   bound_cost = force_var_cost (data, *bound_cst, NULL);
5604   if (bound_cost.cost == 0)
5605     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5606   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5607     bound_cost.cost = 0;
5608   express_cost += bound_cost;
5609
5610   /* Choose the better approach, preferring the eliminated IV. */
5611   if (elim_cost <= express_cost)
5612     {
5613       cost = elim_cost;
5614       inv_vars = inv_vars_elim;
5615       inv_vars_elim = NULL;
5616       inv_expr = inv_expr_elim;
5617       /* For doloop candidate/use pair, adjust to zero cost.  */
5618       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5619         cost = no_cost;
5620     }
5621   else
5622     {
5623       cost = express_cost;
5624       inv_vars = inv_vars_express;
5625       inv_vars_express = NULL;
5626       bound = NULL_TREE;
5627       comp = ERROR_MARK;
5628       inv_expr = inv_expr_express;
5629     }
5630
5631   if (inv_expr)
5632     {
5633       inv_exprs = BITMAP_ALLOC (NULL);
5634       bitmap_set_bit (inv_exprs, inv_expr->id);
5635     }
5636   set_group_iv_cost (data, group, cand, cost,
5637                      inv_vars, bound, comp, inv_exprs);
5638
5639   if (inv_vars_elim)
5640     BITMAP_FREE (inv_vars_elim);
5641   if (inv_vars_express)
5642     BITMAP_FREE (inv_vars_express);
5643
5644   return !cost.infinite_cost_p ();
5645 }
5646
5647 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5648    if USE cannot be represented with CAND.  */
5649
5650 static bool
5651 determine_group_iv_cost (struct ivopts_data *data,
5652                          struct iv_group *group, struct iv_cand *cand)
5653 {
5654   switch (group->type)
5655     {
5656     case USE_NONLINEAR_EXPR:
5657       return determine_group_iv_cost_generic (data, group, cand);
5658
5659     case USE_REF_ADDRESS:
5660     case USE_PTR_ADDRESS:
5661       return determine_group_iv_cost_address (data, group, cand);
5662
5663     case USE_COMPARE:
5664       return determine_group_iv_cost_cond (data, group, cand);
5665
5666     default:
5667       gcc_unreachable ();
5668     }
5669 }
5670
5671 /* Return true if get_computation_cost indicates that autoincrement is
5672    a possibility for the pair of USE and CAND, false otherwise.  */
5673
5674 static bool
5675 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5676                            struct iv_cand *cand)
5677 {
5678   if (!address_p (use->type))
5679     return false;
5680
5681   bool can_autoinc = false;
5682   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5683   return can_autoinc;
5684 }
5685
5686 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5687    use that allows autoincrement, and set their AINC_USE if possible.  */
5688
5689 static void
5690 set_autoinc_for_original_candidates (struct ivopts_data *data)
5691 {
5692   unsigned i, j;
5693
5694   for (i = 0; i < data->vcands.length (); i++)
5695     {
5696       struct iv_cand *cand = data->vcands[i];
5697       struct iv_use *closest_before = NULL;
5698       struct iv_use *closest_after = NULL;
5699       if (cand->pos != IP_ORIGINAL)
5700         continue;
5701
5702       for (j = 0; j < data->vgroups.length (); j++)
5703         {
5704           struct iv_group *group = data->vgroups[j];
5705           struct iv_use *use = group->vuses[0];
5706           unsigned uid = gimple_uid (use->stmt);
5707
5708           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5709             continue;
5710
5711           if (uid < gimple_uid (cand->incremented_at)
5712               && (closest_before == NULL
5713                   || uid > gimple_uid (closest_before->stmt)))
5714             closest_before = use;
5715
5716           if (uid > gimple_uid (cand->incremented_at)
5717               && (closest_after == NULL
5718                   || uid < gimple_uid (closest_after->stmt)))
5719             closest_after = use;
5720         }
5721
5722       if (closest_before != NULL
5723           && autoinc_possible_for_pair (data, closest_before, cand))
5724         cand->ainc_use = closest_before;
5725       else if (closest_after != NULL
5726                && autoinc_possible_for_pair (data, closest_after, cand))
5727         cand->ainc_use = closest_after;
5728     }
5729 }
5730
5731 /* Relate compare use with all candidates.  */
5732
5733 static void
5734 relate_compare_use_with_all_cands (struct ivopts_data *data)
5735 {
5736   unsigned i, count = data->vcands.length ();
5737   for (i = 0; i < data->vgroups.length (); i++)
5738     {
5739       struct iv_group *group = data->vgroups[i];
5740
5741       if (group->type == USE_COMPARE)
5742         bitmap_set_range (group->related_cands, 0, count);
5743     }
5744 }
5745
5746 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5747    PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1.  */
5748
5749 static tree
5750 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5751                              const widest_int &iterations_max)
5752 {
5753   tree ntype = TREE_TYPE (niter);
5754   tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5755   if (!pref_type)
5756     return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5757                         build_int_cst (ntype, 1));
5758
5759   gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5760
5761   int prec = TYPE_PRECISION (ntype);
5762   int pref_prec = TYPE_PRECISION (pref_type);
5763
5764   tree base;
5765
5766   /* Check if the PREFERRED_MODED is able to present niter.  */
5767   if (pref_prec > prec
5768       || wi::ltu_p (iterations_max,
5769                     widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5770                                       UNSIGNED)))
5771     {
5772       /* No wrap, it is safe to use preferred type after niter + 1.  */
5773       if (wi::ltu_p (iterations_max,
5774                      widest_int::from (wi::max_value (prec, UNSIGNED),
5775                                        UNSIGNED)))
5776         {
5777           /* This could help to optimize "-1 +1" pair when niter looks
5778              like "n-1": n is in original mode.  "base = (n - 1) + 1"
5779              in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
5780           base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5781                               build_int_cst (ntype, 1));
5782           base = fold_convert (pref_type, base);
5783         }
5784
5785       /* To avoid wrap, convert niter to preferred type before plus 1.  */
5786       else
5787         {
5788           niter = fold_convert (pref_type, niter);
5789           base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5790                               build_int_cst (pref_type, 1));
5791         }
5792     }
5793   else
5794     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5795                         build_int_cst (ntype, 1));
5796   return base;
5797 }
5798
5799 /* Add one doloop dedicated IV candidate:
5800      - Base is (may_be_zero ? 1 : (niter + 1)).
5801      - Step is -1.  */
5802
5803 static void
5804 add_iv_candidate_for_doloop (struct ivopts_data *data)
5805 {
5806   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5807   gcc_assert (niter_desc && niter_desc->assumptions);
5808
5809   tree niter = niter_desc->niter;
5810   tree ntype = TREE_TYPE (niter);
5811   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5812
5813   tree may_be_zero = niter_desc->may_be_zero;
5814   if (may_be_zero && integer_zerop (may_be_zero))
5815     may_be_zero = NULL_TREE;
5816   if (may_be_zero)
5817     {
5818       if (COMPARISON_CLASS_P (may_be_zero))
5819         {
5820           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5821                                build_int_cst (ntype, 0),
5822                                rewrite_to_non_trapping_overflow (niter));
5823         }
5824       /* Don't try to obtain the iteration count expression when may_be_zero is
5825          integer_nonzerop (actually iteration count is one) or else.  */
5826       else
5827         return;
5828     }
5829
5830   machine_mode mode = TYPE_MODE (ntype);
5831   machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5832
5833   tree base;
5834   if (mode != pref_mode)
5835     {
5836       base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5837       ntype = TREE_TYPE (base);
5838     }
5839   else
5840     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5841                         build_int_cst (ntype, 1));
5842
5843
5844   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5845 }
5846
5847 /* Finds the candidates for the induction variables.  */
5848
5849 static void
5850 find_iv_candidates (struct ivopts_data *data)
5851 {
5852   /* Add commonly used ivs.  */
5853   add_standard_iv_candidates (data);
5854
5855   /* Add doloop dedicated ivs.  */
5856   if (data->doloop_use_p)
5857     add_iv_candidate_for_doloop (data);
5858
5859   /* Add old induction variables.  */
5860   add_iv_candidate_for_bivs (data);
5861
5862   /* Add induction variables derived from uses.  */
5863   add_iv_candidate_for_groups (data);
5864
5865   set_autoinc_for_original_candidates (data);
5866
5867   /* Record the important candidates.  */
5868   record_important_candidates (data);
5869
5870   /* Relate compare iv_use with all candidates.  */
5871   if (!data->consider_all_candidates)
5872     relate_compare_use_with_all_cands (data);
5873
5874   if (dump_file && (dump_flags & TDF_DETAILS))
5875     {
5876       unsigned i;
5877
5878       fprintf (dump_file, "\n<Important Candidates>:\t");
5879       for (i = 0; i < data->vcands.length (); i++)
5880         if (data->vcands[i]->important)
5881           fprintf (dump_file, " %d,", data->vcands[i]->id);
5882       fprintf (dump_file, "\n");
5883
5884       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5885       for (i = 0; i < data->vgroups.length (); i++)
5886         {
5887           struct iv_group *group = data->vgroups[i];
5888
5889           if (group->related_cands)
5890             {
5891               fprintf (dump_file, "  Group %d:\t", group->id);
5892               dump_bitmap (dump_file, group->related_cands);
5893             }
5894         }
5895       fprintf (dump_file, "\n");
5896     }
5897 }
5898
5899 /* Determines costs of computing use of iv with an iv candidate.  */
5900
5901 static void
5902 determine_group_iv_costs (struct ivopts_data *data)
5903 {
5904   unsigned i, j;
5905   struct iv_cand *cand;
5906   struct iv_group *group;
5907   bitmap to_clear = BITMAP_ALLOC (NULL);
5908
5909   alloc_use_cost_map (data);
5910
5911   for (i = 0; i < data->vgroups.length (); i++)
5912     {
5913       group = data->vgroups[i];
5914
5915       if (data->consider_all_candidates)
5916         {
5917           for (j = 0; j < data->vcands.length (); j++)
5918             {
5919               cand = data->vcands[j];
5920               determine_group_iv_cost (data, group, cand);
5921             }
5922         }
5923       else
5924         {
5925           bitmap_iterator bi;
5926
5927           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5928             {
5929               cand = data->vcands[j];
5930               if (!determine_group_iv_cost (data, group, cand))
5931                 bitmap_set_bit (to_clear, j);
5932             }
5933
5934           /* Remove the candidates for that the cost is infinite from
5935              the list of related candidates.  */
5936           bitmap_and_compl_into (group->related_cands, to_clear);
5937           bitmap_clear (to_clear);
5938         }
5939     }
5940
5941   BITMAP_FREE (to_clear);
5942
5943   if (dump_file && (dump_flags & TDF_DETAILS))
5944     {
5945       bitmap_iterator bi;
5946
5947       /* Dump invariant variables.  */
5948       fprintf (dump_file, "\n<Invariant Vars>:\n");
5949       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5950         {
5951           struct version_info *info = ver_info (data, i);
5952           if (info->inv_id)
5953             {
5954               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5955               print_generic_expr (dump_file, info->name, TDF_SLIM);
5956               fprintf (dump_file, "%s\n",
5957                        info->has_nonlin_use ? "" : "\t(eliminable)");
5958             }
5959         }
5960
5961       /* Dump invariant expressions.  */
5962       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5963       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5964
5965       for (hash_table<iv_inv_expr_hasher>::iterator it
5966            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5967            ++it)
5968         list.safe_push (*it);
5969
5970       list.qsort (sort_iv_inv_expr_ent);
5971
5972       for (i = 0; i < list.length (); ++i)
5973         {
5974           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5975           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5976           fprintf (dump_file, "\n");
5977         }
5978
5979       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5980
5981       for (i = 0; i < data->vgroups.length (); i++)
5982         {
5983           group = data->vgroups[i];
5984
5985           fprintf (dump_file, "Group %d:\n", i);
5986           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5987           for (j = 0; j < group->n_map_members; j++)
5988             {
5989               if (!group->cost_map[j].cand
5990                   || group->cost_map[j].cost.infinite_cost_p ())
5991                 continue;
5992
5993               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5994                        group->cost_map[j].cand->id,
5995                        group->cost_map[j].cost.cost,
5996                        group->cost_map[j].cost.complexity);
5997               if (!group->cost_map[j].inv_exprs
5998                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5999                 fprintf (dump_file, "NIL;\t");
6000               else
6001                 bitmap_print (dump_file,
6002                               group->cost_map[j].inv_exprs, "", ";\t");
6003               if (!group->cost_map[j].inv_vars
6004                   || bitmap_empty_p (group->cost_map[j].inv_vars))
6005                 fprintf (dump_file, "NIL;\n");
6006               else
6007                 bitmap_print (dump_file,
6008                               group->cost_map[j].inv_vars, "", "\n");
6009             }
6010
6011           fprintf (dump_file, "\n");
6012         }
6013       fprintf (dump_file, "\n");
6014     }
6015 }
6016
6017 /* Determines cost of the candidate CAND.  */
6018
6019 static void
6020 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6021 {
6022   comp_cost cost_base;
6023   int64_t cost, cost_step;
6024   tree base;
6025
6026   gcc_assert (cand->iv != NULL);
6027
6028   /* There are two costs associated with the candidate -- its increment
6029      and its initialization.  The second is almost negligible for any loop
6030      that rolls enough, so we take it just very little into account.  */
6031
6032   base = cand->iv->base;
6033   cost_base = force_var_cost (data, base, NULL);
6034   /* It will be exceptional that the iv register happens to be initialized with
6035      the proper value at no cost.  In general, there will at least be a regcopy
6036      or a const set.  */
6037   if (cost_base.cost == 0)
6038     cost_base.cost = COSTS_N_INSNS (1);
6039   /* Doloop decrement should be considered as zero cost.  */
6040   if (cand->doloop_p)
6041     cost_step = 0;
6042   else
6043     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6044   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6045
6046   /* Prefer the original ivs unless we may gain something by replacing it.
6047      The reason is to make debugging simpler; so this is not relevant for
6048      artificial ivs created by other optimization passes.  */
6049   if ((cand->pos != IP_ORIGINAL
6050        || !SSA_NAME_VAR (cand->var_before)
6051        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6052       /* Prefer doloop as well.  */
6053       && !cand->doloop_p)
6054     cost++;
6055
6056   /* Prefer not to insert statements into latch unless there are some
6057      already (so that we do not create unnecessary jumps).  */
6058   if (cand->pos == IP_END
6059       && empty_block_p (ip_end_pos (data->current_loop)))
6060     cost++;
6061
6062   cand->cost = cost;
6063   cand->cost_step = cost_step;
6064 }
6065
6066 /* Determines costs of computation of the candidates.  */
6067
6068 static void
6069 determine_iv_costs (struct ivopts_data *data)
6070 {
6071   unsigned i;
6072
6073   if (dump_file && (dump_flags & TDF_DETAILS))
6074     {
6075       fprintf (dump_file, "<Candidate Costs>:\n");
6076       fprintf (dump_file, "  cand\tcost\n");
6077     }
6078
6079   for (i = 0; i < data->vcands.length (); i++)
6080     {
6081       struct iv_cand *cand = data->vcands[i];
6082
6083       determine_iv_cost (data, cand);
6084
6085       if (dump_file && (dump_flags & TDF_DETAILS))
6086         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
6087     }
6088
6089   if (dump_file && (dump_flags & TDF_DETAILS))
6090     fprintf (dump_file, "\n");
6091 }
6092
6093 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6094    induction variables.  Note N_INVS includes both invariant variables and
6095    invariant expressions.  */
6096
6097 static unsigned
6098 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6099                               unsigned n_cands)
6100 {
6101   unsigned cost;
6102   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6103   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6104   bool speed = data->speed;
6105
6106   /* If there is a call in the loop body, the call-clobbered registers
6107      are not available for loop invariants.  */
6108   if (data->body_includes_call)
6109     available_regs = available_regs - target_clobbered_regs;
6110
6111   /* If we have enough registers.  */
6112   if (regs_needed + target_res_regs < available_regs)
6113     cost = n_new;
6114   /* If close to running out of registers, try to preserve them.  */
6115   else if (regs_needed <= available_regs)
6116     cost = target_reg_cost [speed] * regs_needed;
6117   /* If we run out of available registers but the number of candidates
6118      does not, we penalize extra registers using target_spill_cost.  */
6119   else if (n_cands <= available_regs)
6120     cost = target_reg_cost [speed] * available_regs
6121            + target_spill_cost [speed] * (regs_needed - available_regs);
6122   /* If the number of candidates runs out available registers, we penalize
6123      extra candidate registers using target_spill_cost * 2.  Because it is
6124      more expensive to spill induction variable than invariant.  */
6125   else
6126     cost = target_reg_cost [speed] * available_regs
6127            + target_spill_cost [speed] * (n_cands - available_regs) * 2
6128            + target_spill_cost [speed] * (regs_needed - n_cands);
6129
6130   /* Finally, add the number of candidates, so that we prefer eliminating
6131      induction variables if possible.  */
6132   return cost + n_cands;
6133 }
6134
6135 /* For each size of the induction variable set determine the penalty.  */
6136
6137 static void
6138 determine_set_costs (struct ivopts_data *data)
6139 {
6140   unsigned j, n;
6141   gphi *phi;
6142   gphi_iterator psi;
6143   tree op;
6144   class loop *loop = data->current_loop;
6145   bitmap_iterator bi;
6146
6147   if (dump_file && (dump_flags & TDF_DETAILS))
6148     {
6149       fprintf (dump_file, "<Global Costs>:\n");
6150       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6151       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6152       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6153       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6154     }
6155
6156   n = 0;
6157   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6158     {
6159       phi = psi.phi ();
6160       op = PHI_RESULT (phi);
6161
6162       if (virtual_operand_p (op))
6163         continue;
6164
6165       if (get_iv (data, op))
6166         continue;
6167
6168       if (!POINTER_TYPE_P (TREE_TYPE (op))
6169           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6170         continue;
6171
6172       n++;
6173     }
6174
6175   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6176     {
6177       struct version_info *info = ver_info (data, j);
6178
6179       if (info->inv_id && info->has_nonlin_use)
6180         n++;
6181     }
6182
6183   data->regs_used = n;
6184   if (dump_file && (dump_flags & TDF_DETAILS))
6185     fprintf (dump_file, "  regs_used %d\n", n);
6186
6187   if (dump_file && (dump_flags & TDF_DETAILS))
6188     {
6189       fprintf (dump_file, "  cost for size:\n");
6190       fprintf (dump_file, "  ivs\tcost\n");
6191       for (j = 0; j <= 2 * target_avail_regs; j++)
6192         fprintf (dump_file, "  %d\t%d\n", j,
6193                  ivopts_estimate_reg_pressure (data, 0, j));
6194       fprintf (dump_file, "\n");
6195     }
6196 }
6197
6198 /* Returns true if A is a cheaper cost pair than B.  */
6199
6200 static bool
6201 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6202 {
6203   if (!a)
6204     return false;
6205
6206   if (!b)
6207     return true;
6208
6209   if (a->cost < b->cost)
6210     return true;
6211
6212   if (b->cost < a->cost)
6213     return false;
6214
6215   /* In case the costs are the same, prefer the cheaper candidate.  */
6216   if (a->cand->cost < b->cand->cost)
6217     return true;
6218
6219   return false;
6220 }
6221
6222 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6223    for more expensive, equal and cheaper respectively.  */
6224
6225 static int
6226 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6227 {
6228   if (cheaper_cost_pair (a, b))
6229     return -1;
6230   if (cheaper_cost_pair (b, a))
6231     return 1;
6232
6233   return 0;
6234 }
6235
6236 /* Returns candidate by that USE is expressed in IVS.  */
6237
6238 static class cost_pair *
6239 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6240 {
6241   return ivs->cand_for_group[group->id];
6242 }
6243
6244 /* Computes the cost field of IVS structure.  */
6245
6246 static void
6247 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6248 {
6249   comp_cost cost = ivs->cand_use_cost;
6250
6251   cost += ivs->cand_cost;
6252   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6253   ivs->cost = cost;
6254 }
6255
6256 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6257    and IVS.  */
6258
6259 static void
6260 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6261 {
6262   bitmap_iterator bi;
6263   unsigned iid;
6264
6265   if (!invs)
6266     return;
6267
6268   gcc_assert (n_inv_uses != NULL);
6269   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6270     {
6271       n_inv_uses[iid]--;
6272       if (n_inv_uses[iid] == 0)
6273         ivs->n_invs--;
6274     }
6275 }
6276
6277 /* Set USE not to be expressed by any candidate in IVS.  */
6278
6279 static void
6280 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6281                  struct iv_group *group)
6282 {
6283   unsigned gid = group->id, cid;
6284   class cost_pair *cp;
6285
6286   cp = ivs->cand_for_group[gid];
6287   if (!cp)
6288     return;
6289   cid = cp->cand->id;
6290
6291   ivs->bad_groups++;
6292   ivs->cand_for_group[gid] = NULL;
6293   ivs->n_cand_uses[cid]--;
6294
6295   if (ivs->n_cand_uses[cid] == 0)
6296     {
6297       bitmap_clear_bit (ivs->cands, cid);
6298       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6299         ivs->n_cands--;
6300       ivs->cand_cost -= cp->cand->cost;
6301       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6302       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6303     }
6304
6305   ivs->cand_use_cost -= cp->cost;
6306   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6307   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6308   iv_ca_recount_cost (data, ivs);
6309 }
6310
6311 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6312    IVS.  */
6313
6314 static void
6315 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6316 {
6317   bitmap_iterator bi;
6318   unsigned iid;
6319
6320   if (!invs)
6321     return;
6322
6323   gcc_assert (n_inv_uses != NULL);
6324   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6325     {
6326       n_inv_uses[iid]++;
6327       if (n_inv_uses[iid] == 1)
6328         ivs->n_invs++;
6329     }
6330 }
6331
6332 /* Set cost pair for GROUP in set IVS to CP.  */
6333
6334 static void
6335 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6336               struct iv_group *group, class cost_pair *cp)
6337 {
6338   unsigned gid = group->id, cid;
6339
6340   if (ivs->cand_for_group[gid] == cp)
6341     return;
6342
6343   if (ivs->cand_for_group[gid])
6344     iv_ca_set_no_cp (data, ivs, group);
6345
6346   if (cp)
6347     {
6348       cid = cp->cand->id;
6349
6350       ivs->bad_groups--;
6351       ivs->cand_for_group[gid] = cp;
6352       ivs->n_cand_uses[cid]++;
6353       if (ivs->n_cand_uses[cid] == 1)
6354         {
6355           bitmap_set_bit (ivs->cands, cid);
6356           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6357             ivs->n_cands++;
6358           ivs->cand_cost += cp->cand->cost;
6359           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6360           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6361         }
6362
6363       ivs->cand_use_cost += cp->cost;
6364       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6365       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6366       iv_ca_recount_cost (data, ivs);
6367     }
6368 }
6369
6370 /* Extend set IVS by expressing USE by some of the candidates in it
6371    if possible.  Consider all important candidates if candidates in
6372    set IVS don't give any result.  */
6373
6374 static void
6375 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6376                struct iv_group *group)
6377 {
6378   class cost_pair *best_cp = NULL, *cp;
6379   bitmap_iterator bi;
6380   unsigned i;
6381   struct iv_cand *cand;
6382
6383   gcc_assert (ivs->upto >= group->id);
6384   ivs->upto++;
6385   ivs->bad_groups++;
6386
6387   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6388     {
6389       cand = data->vcands[i];
6390       cp = get_group_iv_cost (data, group, cand);
6391       if (cheaper_cost_pair (cp, best_cp))
6392         best_cp = cp;
6393     }
6394
6395   if (best_cp == NULL)
6396     {
6397       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6398         {
6399           cand = data->vcands[i];
6400           cp = get_group_iv_cost (data, group, cand);
6401           if (cheaper_cost_pair (cp, best_cp))
6402             best_cp = cp;
6403         }
6404     }
6405
6406   iv_ca_set_cp (data, ivs, group, best_cp);
6407 }
6408
6409 /* Get cost for assignment IVS.  */
6410
6411 static comp_cost
6412 iv_ca_cost (class iv_ca *ivs)
6413 {
6414   /* This was a conditional expression but it triggered a bug in
6415      Sun C 5.5.  */
6416   if (ivs->bad_groups)
6417     return infinite_cost;
6418   else
6419     return ivs->cost;
6420 }
6421
6422 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6423    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6424    respectively.  */
6425
6426 static int
6427 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6428                     struct iv_group *group, class cost_pair *old_cp,
6429                     class cost_pair *new_cp)
6430 {
6431   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6432   unsigned old_n_invs = ivs->n_invs;
6433   iv_ca_set_cp (data, ivs, group, new_cp);
6434   unsigned new_n_invs = ivs->n_invs;
6435   iv_ca_set_cp (data, ivs, group, old_cp);
6436
6437   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6438 }
6439
6440 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6441    it before NEXT.  */
6442
6443 static struct iv_ca_delta *
6444 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6445                  class cost_pair *new_cp, struct iv_ca_delta *next)
6446 {
6447   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6448
6449   change->group = group;
6450   change->old_cp = old_cp;
6451   change->new_cp = new_cp;
6452   change->next = next;
6453
6454   return change;
6455 }
6456
6457 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6458    are rewritten.  */
6459
6460 static struct iv_ca_delta *
6461 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6462 {
6463   struct iv_ca_delta *last;
6464
6465   if (!l2)
6466     return l1;
6467
6468   if (!l1)
6469     return l2;
6470
6471   for (last = l1; last->next; last = last->next)
6472     continue;
6473   last->next = l2;
6474
6475   return l1;
6476 }
6477
6478 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6479
6480 static struct iv_ca_delta *
6481 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6482 {
6483   struct iv_ca_delta *act, *next, *prev = NULL;
6484
6485   for (act = delta; act; act = next)
6486     {
6487       next = act->next;
6488       act->next = prev;
6489       prev = act;
6490
6491       std::swap (act->old_cp, act->new_cp);
6492     }
6493
6494   return prev;
6495 }
6496
6497 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6498    reverted instead.  */
6499
6500 static void
6501 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6502                     struct iv_ca_delta *delta, bool forward)
6503 {
6504   class cost_pair *from, *to;
6505   struct iv_ca_delta *act;
6506
6507   if (!forward)
6508     delta = iv_ca_delta_reverse (delta);
6509
6510   for (act = delta; act; act = act->next)
6511     {
6512       from = act->old_cp;
6513       to = act->new_cp;
6514       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6515       iv_ca_set_cp (data, ivs, act->group, to);
6516     }
6517
6518   if (!forward)
6519     iv_ca_delta_reverse (delta);
6520 }
6521
6522 /* Returns true if CAND is used in IVS.  */
6523
6524 static bool
6525 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6526 {
6527   return ivs->n_cand_uses[cand->id] > 0;
6528 }
6529
6530 /* Returns number of induction variable candidates in the set IVS.  */
6531
6532 static unsigned
6533 iv_ca_n_cands (class iv_ca *ivs)
6534 {
6535   return ivs->n_cands;
6536 }
6537
6538 /* Free the list of changes DELTA.  */
6539
6540 static void
6541 iv_ca_delta_free (struct iv_ca_delta **delta)
6542 {
6543   struct iv_ca_delta *act, *next;
6544
6545   for (act = *delta; act; act = next)
6546     {
6547       next = act->next;
6548       free (act);
6549     }
6550
6551   *delta = NULL;
6552 }
6553
6554 /* Allocates new iv candidates assignment.  */
6555
6556 static class iv_ca *
6557 iv_ca_new (struct ivopts_data *data)
6558 {
6559   class iv_ca *nw = XNEW (class iv_ca);
6560
6561   nw->upto = 0;
6562   nw->bad_groups = 0;
6563   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6564                                  data->vgroups.length ());
6565   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6566   nw->cands = BITMAP_ALLOC (NULL);
6567   nw->n_cands = 0;
6568   nw->n_invs = 0;
6569   nw->cand_use_cost = no_cost;
6570   nw->cand_cost = 0;
6571   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6572   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6573   nw->cost = no_cost;
6574
6575   return nw;
6576 }
6577
6578 /* Free memory occupied by the set IVS.  */
6579
6580 static void
6581 iv_ca_free (class iv_ca **ivs)
6582 {
6583   free ((*ivs)->cand_for_group);
6584   free ((*ivs)->n_cand_uses);
6585   BITMAP_FREE ((*ivs)->cands);
6586   free ((*ivs)->n_inv_var_uses);
6587   free ((*ivs)->n_inv_expr_uses);
6588   free (*ivs);
6589   *ivs = NULL;
6590 }
6591
6592 /* Dumps IVS to FILE.  */
6593
6594 static void
6595 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6596 {
6597   unsigned i;
6598   comp_cost cost = iv_ca_cost (ivs);
6599
6600   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6601            cost.complexity);
6602   fprintf (file, "  reg_cost: %d\n",
6603            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6604   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6605            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6606            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6607   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6608
6609   for (i = 0; i < ivs->upto; i++)
6610     {
6611       struct iv_group *group = data->vgroups[i];
6612       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6613       if (cp)
6614         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6615                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6616                  cp->cost.cost, cp->cost.complexity);
6617       else
6618         fprintf (file, "   group:%d --> ??\n", group->id);
6619     }
6620
6621   const char *pref = "";
6622   fprintf (file, "  invariant variables: ");
6623   for (i = 1; i <= data->max_inv_var_id; i++)
6624     if (ivs->n_inv_var_uses[i])
6625       {
6626         fprintf (file, "%s%d", pref, i);
6627         pref = ", ";
6628       }
6629
6630   pref = "";
6631   fprintf (file, "\n  invariant expressions: ");
6632   for (i = 1; i <= data->max_inv_expr_id; i++)
6633     if (ivs->n_inv_expr_uses[i])
6634       {
6635         fprintf (file, "%s%d", pref, i);
6636         pref = ", ";
6637       }
6638
6639   fprintf (file, "\n\n");
6640 }
6641
6642 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6643    new set, and store differences in DELTA.  Number of induction variables
6644    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6645    the function will try to find a solution with mimimal iv candidates.  */
6646
6647 static comp_cost
6648 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6649               struct iv_cand *cand, struct iv_ca_delta **delta,
6650               unsigned *n_ivs, bool min_ncand)
6651 {
6652   unsigned i;
6653   comp_cost cost;
6654   struct iv_group *group;
6655   class cost_pair *old_cp, *new_cp;
6656
6657   *delta = NULL;
6658   for (i = 0; i < ivs->upto; i++)
6659     {
6660       group = data->vgroups[i];
6661       old_cp = iv_ca_cand_for_group (ivs, group);
6662
6663       if (old_cp
6664           && old_cp->cand == cand)
6665         continue;
6666
6667       new_cp = get_group_iv_cost (data, group, cand);
6668       if (!new_cp)
6669         continue;
6670
6671       if (!min_ncand)
6672         {
6673           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6674           /* Skip if new_cp depends on more invariants.  */
6675           if (cmp_invs > 0)
6676             continue;
6677
6678           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6679           /* Skip if new_cp is not cheaper.  */
6680           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6681             continue;
6682         }
6683
6684       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6685     }
6686
6687   iv_ca_delta_commit (data, ivs, *delta, true);
6688   cost = iv_ca_cost (ivs);
6689   if (n_ivs)
6690     *n_ivs = iv_ca_n_cands (ivs);
6691   iv_ca_delta_commit (data, ivs, *delta, false);
6692
6693   return cost;
6694 }
6695
6696 /* Try narrowing set IVS by removing CAND.  Return the cost of
6697    the new set and store the differences in DELTA.  START is
6698    the candidate with which we start narrowing.  */
6699
6700 static comp_cost
6701 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6702               struct iv_cand *cand, struct iv_cand *start,
6703               struct iv_ca_delta **delta)
6704 {
6705   unsigned i, ci;
6706   struct iv_group *group;
6707   class cost_pair *old_cp, *new_cp, *cp;
6708   bitmap_iterator bi;
6709   struct iv_cand *cnd;
6710   comp_cost cost, best_cost, acost;
6711
6712   *delta = NULL;
6713   for (i = 0; i < data->vgroups.length (); i++)
6714     {
6715       group = data->vgroups[i];
6716
6717       old_cp = iv_ca_cand_for_group (ivs, group);
6718       if (old_cp->cand != cand)
6719         continue;
6720
6721       best_cost = iv_ca_cost (ivs);
6722       /* Start narrowing with START.  */
6723       new_cp = get_group_iv_cost (data, group, start);
6724
6725       if (data->consider_all_candidates)
6726         {
6727           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6728             {
6729               if (ci == cand->id || (start && ci == start->id))
6730                 continue;
6731
6732               cnd = data->vcands[ci];
6733
6734               cp = get_group_iv_cost (data, group, cnd);
6735               if (!cp)
6736                 continue;
6737
6738               iv_ca_set_cp (data, ivs, group, cp);
6739               acost = iv_ca_cost (ivs);
6740
6741               if (acost < best_cost)
6742                 {
6743                   best_cost = acost;
6744                   new_cp = cp;
6745                 }
6746             }
6747         }
6748       else
6749         {
6750           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6751             {
6752               if (ci == cand->id || (start && ci == start->id))
6753                 continue;
6754
6755               cnd = data->vcands[ci];
6756
6757               cp = get_group_iv_cost (data, group, cnd);
6758               if (!cp)
6759                 continue;
6760
6761               iv_ca_set_cp (data, ivs, group, cp);
6762               acost = iv_ca_cost (ivs);
6763
6764               if (acost < best_cost)
6765                 {
6766                   best_cost = acost;
6767                   new_cp = cp;
6768                 }
6769             }
6770         }
6771       /* Restore to old cp for use.  */
6772       iv_ca_set_cp (data, ivs, group, old_cp);
6773
6774       if (!new_cp)
6775         {
6776           iv_ca_delta_free (delta);
6777           return infinite_cost;
6778         }
6779
6780       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6781     }
6782
6783   iv_ca_delta_commit (data, ivs, *delta, true);
6784   cost = iv_ca_cost (ivs);
6785   iv_ca_delta_commit (data, ivs, *delta, false);
6786
6787   return cost;
6788 }
6789
6790 /* Try optimizing the set of candidates IVS by removing candidates different
6791    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6792    differences in DELTA.  */
6793
6794 static comp_cost
6795 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6796              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6797 {
6798   bitmap_iterator bi;
6799   struct iv_ca_delta *act_delta, *best_delta;
6800   unsigned i;
6801   comp_cost best_cost, acost;
6802   struct iv_cand *cand;
6803
6804   best_delta = NULL;
6805   best_cost = iv_ca_cost (ivs);
6806
6807   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6808     {
6809       cand = data->vcands[i];
6810
6811       if (cand == except_cand)
6812         continue;
6813
6814       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6815
6816       if (acost < best_cost)
6817         {
6818           best_cost = acost;
6819           iv_ca_delta_free (&best_delta);
6820           best_delta = act_delta;
6821         }
6822       else
6823         iv_ca_delta_free (&act_delta);
6824     }
6825
6826   if (!best_delta)
6827     {
6828       *delta = NULL;
6829       return best_cost;
6830     }
6831
6832   /* Recurse to possibly remove other unnecessary ivs.  */
6833   iv_ca_delta_commit (data, ivs, best_delta, true);
6834   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6835   iv_ca_delta_commit (data, ivs, best_delta, false);
6836   *delta = iv_ca_delta_join (best_delta, *delta);
6837   return best_cost;
6838 }
6839
6840 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6841    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6842    the corresponding cost_pair, otherwise just return BEST_CP.  */
6843
6844 static class cost_pair*
6845 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6846                         unsigned int cand_idx, struct iv_cand *old_cand,
6847                         class cost_pair *best_cp)
6848 {
6849   struct iv_cand *cand;
6850   class cost_pair *cp;
6851
6852   gcc_assert (old_cand != NULL && best_cp != NULL);
6853   if (cand_idx == old_cand->id)
6854     return best_cp;
6855
6856   cand = data->vcands[cand_idx];
6857   cp = get_group_iv_cost (data, group, cand);
6858   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6859     return cp;
6860
6861   return best_cp;
6862 }
6863
6864 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6865    which are used by more than one iv uses.  For each of those candidates,
6866    this function tries to represent iv uses under that candidate using
6867    other ones with lower local cost, then tries to prune the new set.
6868    If the new set has lower cost, It returns the new cost after recording
6869    candidate replacement in list DELTA.  */
6870
6871 static comp_cost
6872 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6873                struct iv_ca_delta **delta)
6874 {
6875   bitmap_iterator bi, bj;
6876   unsigned int i, j, k;
6877   struct iv_cand *cand;
6878   comp_cost orig_cost, acost;
6879   struct iv_ca_delta *act_delta, *tmp_delta;
6880   class cost_pair *old_cp, *best_cp = NULL;
6881
6882   *delta = NULL;
6883   orig_cost = iv_ca_cost (ivs);
6884
6885   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6886     {
6887       if (ivs->n_cand_uses[i] == 1
6888           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6889         continue;
6890
6891       cand = data->vcands[i];
6892
6893       act_delta = NULL;
6894       /*  Represent uses under current candidate using other ones with
6895           lower local cost.  */
6896       for (j = 0; j < ivs->upto; j++)
6897         {
6898           struct iv_group *group = data->vgroups[j];
6899           old_cp = iv_ca_cand_for_group (ivs, group);
6900
6901           if (old_cp->cand != cand)
6902             continue;
6903
6904           best_cp = old_cp;
6905           if (data->consider_all_candidates)
6906             for (k = 0; k < data->vcands.length (); k++)
6907               best_cp = cheaper_cost_with_cand (data, group, k,
6908                                                 old_cp->cand, best_cp);
6909           else
6910             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6911               best_cp = cheaper_cost_with_cand (data, group, k,
6912                                                 old_cp->cand, best_cp);
6913
6914           if (best_cp == old_cp)
6915             continue;
6916
6917           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6918         }
6919       /* No need for further prune.  */
6920       if (!act_delta)
6921         continue;
6922
6923       /* Prune the new candidate set.  */
6924       iv_ca_delta_commit (data, ivs, act_delta, true);
6925       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6926       iv_ca_delta_commit (data, ivs, act_delta, false);
6927       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6928
6929       if (acost < orig_cost)
6930         {
6931           *delta = act_delta;
6932           return acost;
6933         }
6934       else
6935         iv_ca_delta_free (&act_delta);
6936     }
6937
6938   return orig_cost;
6939 }
6940
6941 /* Tries to extend the sets IVS in the best possible way in order to
6942    express the GROUP.  If ORIGINALP is true, prefer candidates from
6943    the original set of IVs, otherwise favor important candidates not
6944    based on any memory object.  */
6945
6946 static bool
6947 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6948                   struct iv_group *group, bool originalp)
6949 {
6950   comp_cost best_cost, act_cost;
6951   unsigned i;
6952   bitmap_iterator bi;
6953   struct iv_cand *cand;
6954   struct iv_ca_delta *best_delta = NULL, *act_delta;
6955   class cost_pair *cp;
6956
6957   iv_ca_add_group (data, ivs, group);
6958   best_cost = iv_ca_cost (ivs);
6959   cp = iv_ca_cand_for_group (ivs, group);
6960   if (cp)
6961     {
6962       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6963       iv_ca_set_no_cp (data, ivs, group);
6964     }
6965
6966   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6967      first try important candidates not based on any memory object.  Only if
6968      this fails, try the specific ones.  Rationale -- in loops with many
6969      variables the best choice often is to use just one generic biv.  If we
6970      added here many ivs specific to the uses, the optimization algorithm later
6971      would be likely to get stuck in a local minimum, thus causing us to create
6972      too many ivs.  The approach from few ivs to more seems more likely to be
6973      successful -- starting from few ivs, replacing an expensive use by a
6974      specific iv should always be a win.  */
6975   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6976     {
6977       cand = data->vcands[i];
6978
6979       if (originalp && cand->pos !=IP_ORIGINAL)
6980         continue;
6981
6982       if (!originalp && cand->iv->base_object != NULL_TREE)
6983         continue;
6984
6985       if (iv_ca_cand_used_p (ivs, cand))
6986         continue;
6987
6988       cp = get_group_iv_cost (data, group, cand);
6989       if (!cp)
6990         continue;
6991
6992       iv_ca_set_cp (data, ivs, group, cp);
6993       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6994                                true);
6995       iv_ca_set_no_cp (data, ivs, group);
6996       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6997
6998       if (act_cost < best_cost)
6999         {
7000           best_cost = act_cost;
7001
7002           iv_ca_delta_free (&best_delta);
7003           best_delta = act_delta;
7004         }
7005       else
7006         iv_ca_delta_free (&act_delta);
7007     }
7008
7009   if (best_cost.infinite_cost_p ())
7010     {
7011       for (i = 0; i < group->n_map_members; i++)
7012         {
7013           cp = group->cost_map + i;
7014           cand = cp->cand;
7015           if (!cand)
7016             continue;
7017
7018           /* Already tried this.  */
7019           if (cand->important)
7020             {
7021               if (originalp && cand->pos == IP_ORIGINAL)
7022                 continue;
7023               if (!originalp && cand->iv->base_object == NULL_TREE)
7024                 continue;
7025             }
7026
7027           if (iv_ca_cand_used_p (ivs, cand))
7028             continue;
7029
7030           act_delta = NULL;
7031           iv_ca_set_cp (data, ivs, group, cp);
7032           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7033           iv_ca_set_no_cp (data, ivs, group);
7034           act_delta = iv_ca_delta_add (group,
7035                                        iv_ca_cand_for_group (ivs, group),
7036                                        cp, act_delta);
7037
7038           if (act_cost < best_cost)
7039             {
7040               best_cost = act_cost;
7041
7042               if (best_delta)
7043                 iv_ca_delta_free (&best_delta);
7044               best_delta = act_delta;
7045             }
7046           else
7047             iv_ca_delta_free (&act_delta);
7048         }
7049     }
7050
7051   iv_ca_delta_commit (data, ivs, best_delta, true);
7052   iv_ca_delta_free (&best_delta);
7053
7054   return !best_cost.infinite_cost_p ();
7055 }
7056
7057 /* Finds an initial assignment of candidates to uses.  */
7058
7059 static class iv_ca *
7060 get_initial_solution (struct ivopts_data *data, bool originalp)
7061 {
7062   unsigned i;
7063   class iv_ca *ivs = iv_ca_new (data);
7064
7065   for (i = 0; i < data->vgroups.length (); i++)
7066     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7067       {
7068         iv_ca_free (&ivs);
7069         return NULL;
7070       }
7071
7072   return ivs;
7073 }
7074
7075 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
7076    points to a bool variable, this function tries to break local
7077    optimal fixed-point by replacing candidates in IVS if it's true.  */
7078
7079 static bool
7080 try_improve_iv_set (struct ivopts_data *data,
7081                     class iv_ca *ivs, bool *try_replace_p)
7082 {
7083   unsigned i, n_ivs;
7084   comp_cost acost, best_cost = iv_ca_cost (ivs);
7085   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7086   struct iv_cand *cand;
7087
7088   /* Try extending the set of induction variables by one.  */
7089   for (i = 0; i < data->vcands.length (); i++)
7090     {
7091       cand = data->vcands[i];
7092
7093       if (iv_ca_cand_used_p (ivs, cand))
7094         continue;
7095
7096       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7097       if (!act_delta)
7098         continue;
7099
7100       /* If we successfully added the candidate and the set is small enough,
7101          try optimizing it by removing other candidates.  */
7102       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7103         {
7104           iv_ca_delta_commit (data, ivs, act_delta, true);
7105           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7106           iv_ca_delta_commit (data, ivs, act_delta, false);
7107           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7108         }
7109
7110       if (acost < best_cost)
7111         {
7112           best_cost = acost;
7113           iv_ca_delta_free (&best_delta);
7114           best_delta = act_delta;
7115         }
7116       else
7117         iv_ca_delta_free (&act_delta);
7118     }
7119
7120   if (!best_delta)
7121     {
7122       /* Try removing the candidates from the set instead.  */
7123       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7124
7125       if (!best_delta && *try_replace_p)
7126         {
7127           *try_replace_p = false;
7128           /* So far candidate selecting algorithm tends to choose fewer IVs
7129              so that it can handle cases in which loops have many variables
7130              but the best choice is often to use only one general biv.  One
7131              weakness is it can't handle opposite cases, in which different
7132              candidates should be chosen with respect to each use.  To solve
7133              the problem, we replace candidates in a manner described by the
7134              comments of iv_ca_replace, thus give general algorithm a chance
7135              to break local optimal fixed-point in these cases.  */
7136           best_cost = iv_ca_replace (data, ivs, &best_delta);
7137         }
7138
7139       if (!best_delta)
7140         return false;
7141     }
7142
7143   iv_ca_delta_commit (data, ivs, best_delta, true);
7144   iv_ca_delta_free (&best_delta);
7145   return best_cost == iv_ca_cost (ivs);
7146 }
7147
7148 /* Attempts to find the optimal set of induction variables.  We do simple
7149    greedy heuristic -- we try to replace at most one candidate in the selected
7150    solution and remove the unused ivs while this improves the cost.  */
7151
7152 static class iv_ca *
7153 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7154 {
7155   class iv_ca *set;
7156   bool try_replace_p = true;
7157
7158   /* Get the initial solution.  */
7159   set = get_initial_solution (data, originalp);
7160   if (!set)
7161     {
7162       if (dump_file && (dump_flags & TDF_DETAILS))
7163         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7164       return NULL;
7165     }
7166
7167   if (dump_file && (dump_flags & TDF_DETAILS))
7168     {
7169       fprintf (dump_file, "Initial set of candidates:\n");
7170       iv_ca_dump (data, dump_file, set);
7171     }
7172
7173   while (try_improve_iv_set (data, set, &try_replace_p))
7174     {
7175       if (dump_file && (dump_flags & TDF_DETAILS))
7176         {
7177           fprintf (dump_file, "Improved to:\n");
7178           iv_ca_dump (data, dump_file, set);
7179         }
7180     }
7181
7182   /* If the set has infinite_cost, it can't be optimal.  */
7183   if (iv_ca_cost (set).infinite_cost_p ())
7184     {
7185       if (dump_file && (dump_flags & TDF_DETAILS))
7186         fprintf (dump_file,
7187                  "Overflow to infinite cost in try_improve_iv_set.\n");
7188       iv_ca_free (&set);
7189     }
7190   return set;
7191 }
7192
7193 static class iv_ca *
7194 find_optimal_iv_set (struct ivopts_data *data)
7195 {
7196   unsigned i;
7197   comp_cost cost, origcost;
7198   class iv_ca *set, *origset;
7199
7200   /* Determine the cost based on a strategy that starts with original IVs,
7201      and try again using a strategy that prefers candidates not based
7202      on any IVs.  */
7203   origset = find_optimal_iv_set_1 (data, true);
7204   set = find_optimal_iv_set_1 (data, false);
7205
7206   if (!origset && !set)
7207     return NULL;
7208
7209   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7210   cost = set ? iv_ca_cost (set) : infinite_cost;
7211
7212   if (dump_file && (dump_flags & TDF_DETAILS))
7213     {
7214       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7215                origcost.cost, origcost.complexity);
7216       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7217                cost.cost, cost.complexity);
7218     }
7219
7220   /* Choose the one with the best cost.  */
7221   if (origcost <= cost)
7222     {
7223       if (set)
7224         iv_ca_free (&set);
7225       set = origset;
7226     }
7227   else if (origset)
7228     iv_ca_free (&origset);
7229
7230   for (i = 0; i < data->vgroups.length (); i++)
7231     {
7232       struct iv_group *group = data->vgroups[i];
7233       group->selected = iv_ca_cand_for_group (set, group)->cand;
7234     }
7235
7236   return set;
7237 }
7238
7239 /* Creates a new induction variable corresponding to CAND.  */
7240
7241 static void
7242 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7243 {
7244   gimple_stmt_iterator incr_pos;
7245   tree base;
7246   struct iv_use *use;
7247   struct iv_group *group;
7248   bool after = false;
7249
7250   gcc_assert (cand->iv != NULL);
7251
7252   switch (cand->pos)
7253     {
7254     case IP_NORMAL:
7255       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7256       break;
7257
7258     case IP_END:
7259       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7260       after = true;
7261       if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7262         {
7263           edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7264           incr_pos = gsi_after_labels (split_edge (e));
7265           after = false;
7266         }
7267       break;
7268
7269     case IP_AFTER_USE:
7270       after = true;
7271       /* fall through */
7272     case IP_BEFORE_USE:
7273       incr_pos = gsi_for_stmt (cand->incremented_at);
7274       break;
7275
7276     case IP_ORIGINAL:
7277       /* Mark that the iv is preserved.  */
7278       name_info (data, cand->var_before)->preserve_biv = true;
7279       name_info (data, cand->var_after)->preserve_biv = true;
7280
7281       /* Rewrite the increment so that it uses var_before directly.  */
7282       use = find_interesting_uses_op (data, cand->var_after);
7283       group = data->vgroups[use->group_id];
7284       group->selected = cand;
7285       return;
7286     }
7287
7288   gimple_add_tmp_var (cand->var_before);
7289
7290   base = unshare_expr (cand->iv->base);
7291
7292   create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7293              cand->var_before, data->current_loop,
7294              &incr_pos, after, &cand->var_before, &cand->var_after);
7295 }
7296
7297 /* Creates new induction variables described in SET.  */
7298
7299 static void
7300 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7301 {
7302   unsigned i;
7303   struct iv_cand *cand;
7304   bitmap_iterator bi;
7305
7306   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7307     {
7308       cand = data->vcands[i];
7309       create_new_iv (data, cand);
7310     }
7311
7312   if (dump_file && (dump_flags & TDF_DETAILS))
7313     {
7314       fprintf (dump_file, "Selected IV set for loop %d",
7315                data->current_loop->num);
7316       if (data->loop_loc != UNKNOWN_LOCATION)
7317         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7318                  LOCATION_LINE (data->loop_loc));
7319       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7320                avg_loop_niter (data->current_loop));
7321       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7322       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7323         {
7324           cand = data->vcands[i];
7325           dump_cand (dump_file, cand);
7326         }
7327       fprintf (dump_file, "\n");
7328     }
7329 }
7330
7331 /* Rewrites USE (definition of iv used in a nonlinear expression)
7332    using candidate CAND.  */
7333
7334 static void
7335 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7336                             struct iv_use *use, struct iv_cand *cand)
7337 {
7338   gassign *ass;
7339   gimple_stmt_iterator bsi;
7340   tree comp, type = get_use_type (use), tgt;
7341
7342   /* An important special case -- if we are asked to express value of
7343      the original iv by itself, just exit; there is no need to
7344      introduce a new computation (that might also need casting the
7345      variable to unsigned and back).  */
7346   if (cand->pos == IP_ORIGINAL
7347       && cand->incremented_at == use->stmt)
7348     {
7349       tree op = NULL_TREE;
7350       enum tree_code stmt_code;
7351
7352       gcc_assert (is_gimple_assign (use->stmt));
7353       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7354
7355       /* Check whether we may leave the computation unchanged.
7356          This is the case only if it does not rely on other
7357          computations in the loop -- otherwise, the computation
7358          we rely upon may be removed in remove_unused_ivs,
7359          thus leading to ICE.  */
7360       stmt_code = gimple_assign_rhs_code (use->stmt);
7361       if (stmt_code == PLUS_EXPR
7362           || stmt_code == MINUS_EXPR
7363           || stmt_code == POINTER_PLUS_EXPR)
7364         {
7365           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7366             op = gimple_assign_rhs2 (use->stmt);
7367           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7368             op = gimple_assign_rhs1 (use->stmt);
7369         }
7370
7371       if (op != NULL_TREE)
7372         {
7373           if (expr_invariant_in_loop_p (data->current_loop, op))
7374             return;
7375           if (TREE_CODE (op) == SSA_NAME)
7376             {
7377               struct iv *iv = get_iv (data, op);
7378               if (iv != NULL && integer_zerop (iv->step))
7379                 return;
7380             }
7381         }
7382     }
7383
7384   switch (gimple_code (use->stmt))
7385     {
7386     case GIMPLE_PHI:
7387       tgt = PHI_RESULT (use->stmt);
7388
7389       /* If we should keep the biv, do not replace it.  */
7390       if (name_info (data, tgt)->preserve_biv)
7391         return;
7392
7393       bsi = gsi_after_labels (gimple_bb (use->stmt));
7394       break;
7395
7396     case GIMPLE_ASSIGN:
7397       tgt = gimple_assign_lhs (use->stmt);
7398       bsi = gsi_for_stmt (use->stmt);
7399       break;
7400
7401     default:
7402       gcc_unreachable ();
7403     }
7404
7405   aff_tree aff_inv, aff_var;
7406   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7407                               use, cand, &aff_inv, &aff_var))
7408     gcc_unreachable ();
7409
7410   unshare_aff_combination (&aff_inv);
7411   unshare_aff_combination (&aff_var);
7412   /* Prefer CSE opportunity than loop invariant by adding offset at last
7413      so that iv_uses have different offsets can be CSEed.  */
7414   poly_widest_int offset = aff_inv.offset;
7415   aff_inv.offset = 0;
7416
7417   gimple_seq stmt_list = NULL, seq = NULL;
7418   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7419   tree comp_op2 = aff_combination_to_tree (&aff_var);
7420   gcc_assert (comp_op1 && comp_op2);
7421
7422   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7423   gimple_seq_add_seq (&stmt_list, seq);
7424   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7425   gimple_seq_add_seq (&stmt_list, seq);
7426
7427   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7428     std::swap (comp_op1, comp_op2);
7429
7430   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7431     {
7432       comp = fold_build_pointer_plus (comp_op1,
7433                                       fold_convert (sizetype, comp_op2));
7434       comp = fold_build_pointer_plus (comp,
7435                                       wide_int_to_tree (sizetype, offset));
7436     }
7437   else
7438     {
7439       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7440                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7441       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7442                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7443     }
7444
7445   comp = fold_convert (type, comp);
7446   comp = force_gimple_operand (comp, &seq, false, NULL);
7447   gimple_seq_add_seq (&stmt_list, seq);
7448   if (gimple_code (use->stmt) != GIMPLE_PHI
7449       /* We can't allow re-allocating the stmt as it might be pointed
7450          to still.  */
7451       && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7452           >= gimple_num_ops (gsi_stmt (bsi))))
7453     {
7454       comp = force_gimple_operand (comp, &seq, true, NULL);
7455       gimple_seq_add_seq (&stmt_list, seq);
7456       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7457         {
7458           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7459           /* As this isn't a plain copy we have to reset alignment
7460              information.  */
7461           if (SSA_NAME_PTR_INFO (comp))
7462             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7463         }
7464     }
7465
7466   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7467   if (gimple_code (use->stmt) == GIMPLE_PHI)
7468     {
7469       ass = gimple_build_assign (tgt, comp);
7470       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7471
7472       bsi = gsi_for_stmt (use->stmt);
7473       remove_phi_node (&bsi, false);
7474     }
7475   else
7476     {
7477       gimple_assign_set_rhs_from_tree (&bsi, comp);
7478       use->stmt = gsi_stmt (bsi);
7479     }
7480 }
7481
7482 /* Performs a peephole optimization to reorder the iv update statement with
7483    a mem ref to enable instruction combining in later phases. The mem ref uses
7484    the iv value before the update, so the reordering transformation requires
7485    adjustment of the offset. CAND is the selected IV_CAND.
7486
7487    Example:
7488
7489    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7490    iv2 = iv1 + 1;
7491
7492    if (t < val)      (1)
7493      goto L;
7494    goto Head;
7495
7496
7497    directly propagating t over to (1) will introduce overlapping live range
7498    thus increase register pressure. This peephole transform it into:
7499
7500
7501    iv2 = iv1 + 1;
7502    t = MEM_REF (base, iv2, 8, 8);
7503    if (t < val)
7504      goto L;
7505    goto Head;
7506 */
7507
7508 static void
7509 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7510 {
7511   tree var_after;
7512   gimple *iv_update, *stmt;
7513   basic_block bb;
7514   gimple_stmt_iterator gsi, gsi_iv;
7515
7516   if (cand->pos != IP_NORMAL)
7517     return;
7518
7519   var_after = cand->var_after;
7520   iv_update = SSA_NAME_DEF_STMT (var_after);
7521
7522   bb = gimple_bb (iv_update);
7523   gsi = gsi_last_nondebug_bb (bb);
7524   stmt = gsi_stmt (gsi);
7525
7526   /* Only handle conditional statement for now.  */
7527   if (gimple_code (stmt) != GIMPLE_COND)
7528     return;
7529
7530   gsi_prev_nondebug (&gsi);
7531   stmt = gsi_stmt (gsi);
7532   if (stmt != iv_update)
7533     return;
7534
7535   gsi_prev_nondebug (&gsi);
7536   if (gsi_end_p (gsi))
7537     return;
7538
7539   stmt = gsi_stmt (gsi);
7540   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7541     return;
7542
7543   if (stmt != use->stmt)
7544     return;
7545
7546   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7547     return;
7548
7549   if (dump_file && (dump_flags & TDF_DETAILS))
7550     {
7551       fprintf (dump_file, "Reordering \n");
7552       print_gimple_stmt (dump_file, iv_update, 0);
7553       print_gimple_stmt (dump_file, use->stmt, 0);
7554       fprintf (dump_file, "\n");
7555     }
7556
7557   gsi = gsi_for_stmt (use->stmt);
7558   gsi_iv = gsi_for_stmt (iv_update);
7559   gsi_move_before (&gsi_iv, &gsi);
7560
7561   cand->pos = IP_BEFORE_USE;
7562   cand->incremented_at = use->stmt;
7563 }
7564
7565 /* Return the alias pointer type that should be used for a MEM_REF
7566    associated with USE, which has type USE_PTR_ADDRESS.  */
7567
7568 static tree
7569 get_alias_ptr_type_for_ptr_address (iv_use *use)
7570 {
7571   gcall *call = as_a <gcall *> (use->stmt);
7572   switch (gimple_call_internal_fn (call))
7573     {
7574     case IFN_MASK_LOAD:
7575     case IFN_MASK_STORE:
7576     case IFN_MASK_LOAD_LANES:
7577     case IFN_MASK_STORE_LANES:
7578     case IFN_MASK_LEN_LOAD_LANES:
7579     case IFN_MASK_LEN_STORE_LANES:
7580     case IFN_LEN_LOAD:
7581     case IFN_LEN_STORE:
7582     case IFN_MASK_LEN_LOAD:
7583     case IFN_MASK_LEN_STORE:
7584       /* The second argument contains the correct alias type.  */
7585       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7586       return TREE_TYPE (gimple_call_arg (call, 1));
7587
7588     default:
7589       gcc_unreachable ();
7590     }
7591 }
7592
7593
7594 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7595
7596 static void
7597 rewrite_use_address (struct ivopts_data *data,
7598                      struct iv_use *use, struct iv_cand *cand)
7599 {
7600   aff_tree aff;
7601   bool ok;
7602
7603   adjust_iv_update_pos (cand, use);
7604   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7605   gcc_assert (ok);
7606   unshare_aff_combination (&aff);
7607
7608   /* To avoid undefined overflow problems, all IV candidates use unsigned
7609      integer types.  The drawback is that this makes it impossible for
7610      create_mem_ref to distinguish an IV that is based on a memory object
7611      from one that represents simply an offset.
7612
7613      To work around this problem, we pass a hint to create_mem_ref that
7614      indicates which variable (if any) in aff is an IV based on a memory
7615      object.  Note that we only consider the candidate.  If this is not
7616      based on an object, the base of the reference is in some subexpression
7617      of the use -- but these will use pointer types, so they are recognized
7618      by the create_mem_ref heuristics anyway.  */
7619   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7620   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7621   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7622   tree type = use->mem_type;
7623   tree alias_ptr_type;
7624   if (use->type == USE_PTR_ADDRESS)
7625     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7626   else
7627     {
7628       gcc_assert (type == TREE_TYPE (*use->op_p));
7629       unsigned int align = get_object_alignment (*use->op_p);
7630       if (align != TYPE_ALIGN (type))
7631         type = build_aligned_type (type, align);
7632       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7633     }
7634   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7635                              iv, base_hint, data->speed);
7636
7637   if (use->type == USE_PTR_ADDRESS)
7638     {
7639       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7640       ref = fold_convert (get_use_type (use), ref);
7641       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7642                                       true, GSI_SAME_STMT);
7643     }
7644   else
7645     {
7646       /* When we end up confused enough and have no suitable base but
7647          stuffed everything to index2 use a LEA for the address and
7648          create a plain MEM_REF to avoid basing a memory reference
7649          on address zero which create_mem_ref_raw does as fallback.  */
7650       if (TREE_CODE (ref) == TARGET_MEM_REF
7651           && TMR_INDEX2 (ref) != NULL_TREE
7652           && integer_zerop (TREE_OPERAND (ref, 0)))
7653         {
7654           ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7655           ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7656                                           true, GSI_SAME_STMT);
7657           ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7658         }
7659       copy_ref_info (ref, *use->op_p);
7660     }
7661
7662   *use->op_p = ref;
7663 }
7664
7665 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7666    candidate CAND.  */
7667
7668 static void
7669 rewrite_use_compare (struct ivopts_data *data,
7670                      struct iv_use *use, struct iv_cand *cand)
7671 {
7672   tree comp, op, bound;
7673   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7674   enum tree_code compare;
7675   struct iv_group *group = data->vgroups[use->group_id];
7676   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7677
7678   bound = cp->value;
7679   if (bound)
7680     {
7681       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7682       tree var_type = TREE_TYPE (var);
7683       gimple_seq stmts;
7684
7685       if (dump_file && (dump_flags & TDF_DETAILS))
7686         {
7687           fprintf (dump_file, "Replacing exit test: ");
7688           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7689         }
7690       compare = cp->comp;
7691       bound = unshare_expr (fold_convert (var_type, bound));
7692       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7693       if (stmts)
7694         gsi_insert_seq_on_edge_immediate (
7695                 loop_preheader_edge (data->current_loop),
7696                 stmts);
7697
7698       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7699       gimple_cond_set_lhs (cond_stmt, var);
7700       gimple_cond_set_code (cond_stmt, compare);
7701       gimple_cond_set_rhs (cond_stmt, op);
7702       return;
7703     }
7704
7705   /* The induction variable elimination failed; just express the original
7706      giv.  */
7707   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7708   gcc_assert (comp != NULL_TREE);
7709   gcc_assert (use->op_p != NULL);
7710   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7711                                          SSA_NAME_VAR (*use->op_p),
7712                                          true, GSI_SAME_STMT);
7713 }
7714
7715 /* Rewrite the groups using the selected induction variables.  */
7716
7717 static void
7718 rewrite_groups (struct ivopts_data *data)
7719 {
7720   unsigned i, j;
7721
7722   for (i = 0; i < data->vgroups.length (); i++)
7723     {
7724       struct iv_group *group = data->vgroups[i];
7725       struct iv_cand *cand = group->selected;
7726
7727       gcc_assert (cand);
7728
7729       if (group->type == USE_NONLINEAR_EXPR)
7730         {
7731           for (j = 0; j < group->vuses.length (); j++)
7732             {
7733               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7734               update_stmt (group->vuses[j]->stmt);
7735             }
7736         }
7737       else if (address_p (group->type))
7738         {
7739           for (j = 0; j < group->vuses.length (); j++)
7740             {
7741               rewrite_use_address (data, group->vuses[j], cand);
7742               update_stmt (group->vuses[j]->stmt);
7743             }
7744         }
7745       else
7746         {
7747           gcc_assert (group->type == USE_COMPARE);
7748
7749           for (j = 0; j < group->vuses.length (); j++)
7750             {
7751               rewrite_use_compare (data, group->vuses[j], cand);
7752               update_stmt (group->vuses[j]->stmt);
7753             }
7754         }
7755     }
7756 }
7757
7758 /* Removes the ivs that are not used after rewriting.  */
7759
7760 static void
7761 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7762 {
7763   unsigned j;
7764   bitmap_iterator bi;
7765
7766   /* Figure out an order in which to release SSA DEFs so that we don't
7767      release something that we'd have to propagate into a debug stmt
7768      afterwards.  */
7769   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7770     {
7771       struct version_info *info;
7772
7773       info = ver_info (data, j);
7774       if (info->iv
7775           && !integer_zerop (info->iv->step)
7776           && !info->inv_id
7777           && !info->iv->nonlin_use
7778           && !info->preserve_biv)
7779         {
7780           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7781
7782           tree def = info->iv->ssa_name;
7783
7784           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7785             {
7786               imm_use_iterator imm_iter;
7787               use_operand_p use_p;
7788               gimple *stmt;
7789               int count = 0;
7790
7791               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7792                 {
7793                   if (!gimple_debug_bind_p (stmt))
7794                     continue;
7795
7796                   /* We just want to determine whether to do nothing
7797                      (count == 0), to substitute the computed
7798                      expression into a single use of the SSA DEF by
7799                      itself (count == 1), or to use a debug temp
7800                      because the SSA DEF is used multiple times or as
7801                      part of a larger expression (count > 1). */
7802                   count++;
7803                   if (gimple_debug_bind_get_value (stmt) != def)
7804                     count++;
7805
7806                   if (count > 1)
7807                     break;
7808                 }
7809
7810               if (!count)
7811                 continue;
7812
7813               struct iv_use dummy_use;
7814               struct iv_cand *best_cand = NULL, *cand;
7815               unsigned i, best_pref = 0, cand_pref;
7816               tree comp = NULL_TREE;
7817
7818               memset (&dummy_use, 0, sizeof (dummy_use));
7819               dummy_use.iv = info->iv;
7820               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7821                 {
7822                   cand = data->vgroups[i]->selected;
7823                   if (cand == best_cand)
7824                     continue;
7825                   cand_pref = operand_equal_p (cand->iv->step,
7826                                                info->iv->step, 0)
7827                     ? 4 : 0;
7828                   cand_pref
7829                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7830                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7831                     ? 2 : 0;
7832                   cand_pref
7833                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7834                     ? 1 : 0;
7835                   if (best_cand == NULL || best_pref < cand_pref)
7836                     {
7837                       tree this_comp
7838                         = get_debug_computation_at (data->current_loop,
7839                                                     SSA_NAME_DEF_STMT (def),
7840                                                     &dummy_use, cand);
7841                       if (this_comp)
7842                         {
7843                           best_cand = cand;
7844                           best_pref = cand_pref;
7845                           comp = this_comp;
7846                         }
7847                     }
7848                 }
7849
7850               if (!best_cand)
7851                 continue;
7852
7853               comp = unshare_expr (comp);
7854               if (count > 1)
7855                 {
7856                   tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7857                   /* FIXME: Is setting the mode really necessary? */
7858                   if (SSA_NAME_VAR (def))
7859                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7860                   else
7861                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7862                   gdebug *def_temp
7863                     = gimple_build_debug_bind (vexpr, comp, NULL);
7864                   gimple_stmt_iterator gsi;
7865
7866                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7867                     gsi = gsi_after_labels (gimple_bb
7868                                             (SSA_NAME_DEF_STMT (def)));
7869                   else
7870                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7871
7872                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7873                   comp = vexpr;
7874                 }
7875
7876               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7877                 {
7878                   if (!gimple_debug_bind_p (stmt))
7879                     continue;
7880
7881                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7882                     SET_USE (use_p, comp);
7883
7884                   update_stmt (stmt);
7885                 }
7886             }
7887         }
7888     }
7889 }
7890
7891 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7892    for hash_map::traverse.  */
7893
7894 bool
7895 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7896 {
7897   free (value);
7898   return true;
7899 }
7900
7901 /* Frees data allocated by the optimization of a single loop.  */
7902
7903 static void
7904 free_loop_data (struct ivopts_data *data)
7905 {
7906   unsigned i, j;
7907   bitmap_iterator bi;
7908   tree obj;
7909
7910   if (data->niters)
7911     {
7912       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7913       delete data->niters;
7914       data->niters = NULL;
7915     }
7916
7917   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7918     {
7919       struct version_info *info;
7920
7921       info = ver_info (data, i);
7922       info->iv = NULL;
7923       info->has_nonlin_use = false;
7924       info->preserve_biv = false;
7925       info->inv_id = 0;
7926     }
7927   bitmap_clear (data->relevant);
7928   bitmap_clear (data->important_candidates);
7929
7930   for (i = 0; i < data->vgroups.length (); i++)
7931     {
7932       struct iv_group *group = data->vgroups[i];
7933
7934       for (j = 0; j < group->vuses.length (); j++)
7935         free (group->vuses[j]);
7936       group->vuses.release ();
7937
7938       BITMAP_FREE (group->related_cands);
7939       for (j = 0; j < group->n_map_members; j++)
7940         {
7941           if (group->cost_map[j].inv_vars)
7942             BITMAP_FREE (group->cost_map[j].inv_vars);
7943           if (group->cost_map[j].inv_exprs)
7944             BITMAP_FREE (group->cost_map[j].inv_exprs);
7945         }
7946
7947       free (group->cost_map);
7948       free (group);
7949     }
7950   data->vgroups.truncate (0);
7951
7952   for (i = 0; i < data->vcands.length (); i++)
7953     {
7954       struct iv_cand *cand = data->vcands[i];
7955
7956       if (cand->inv_vars)
7957         BITMAP_FREE (cand->inv_vars);
7958       if (cand->inv_exprs)
7959         BITMAP_FREE (cand->inv_exprs);
7960       free (cand);
7961     }
7962   data->vcands.truncate (0);
7963
7964   if (data->version_info_size < num_ssa_names)
7965     {
7966       data->version_info_size = 2 * num_ssa_names;
7967       free (data->version_info);
7968       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7969     }
7970
7971   data->max_inv_var_id = 0;
7972   data->max_inv_expr_id = 0;
7973
7974   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7975     SET_DECL_RTL (obj, NULL_RTX);
7976
7977   decl_rtl_to_reset.truncate (0);
7978
7979   data->inv_expr_tab->empty ();
7980
7981   data->iv_common_cand_tab->empty ();
7982   data->iv_common_cands.truncate (0);
7983 }
7984
7985 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7986    loop tree.  */
7987
7988 static void
7989 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7990 {
7991   free_loop_data (data);
7992   free (data->version_info);
7993   BITMAP_FREE (data->relevant);
7994   BITMAP_FREE (data->important_candidates);
7995
7996   decl_rtl_to_reset.release ();
7997   data->vgroups.release ();
7998   data->vcands.release ();
7999   delete data->inv_expr_tab;
8000   data->inv_expr_tab = NULL;
8001   free_affine_expand_cache (&data->name_expansion_cache);
8002   if (data->base_object_map)
8003     delete data->base_object_map;
8004   delete data->iv_common_cand_tab;
8005   data->iv_common_cand_tab = NULL;
8006   data->iv_common_cands.release ();
8007   obstack_free (&data->iv_obstack, NULL);
8008 }
8009
8010 /* Returns true if the loop body BODY includes any function calls.  */
8011
8012 static bool
8013 loop_body_includes_call (basic_block *body, unsigned num_nodes)
8014 {
8015   gimple_stmt_iterator gsi;
8016   unsigned i;
8017
8018   for (i = 0; i < num_nodes; i++)
8019     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8020       {
8021         gimple *stmt = gsi_stmt (gsi);
8022         if (is_gimple_call (stmt)
8023             && !gimple_call_internal_p (stmt)
8024             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8025           return true;
8026       }
8027   return false;
8028 }
8029
8030 /* Determine cost scaling factor for basic blocks in loop.  */
8031 #define COST_SCALING_FACTOR_BOUND (20)
8032
8033 static void
8034 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8035 {
8036   int lfreq = data->current_loop->header->count.to_frequency (cfun);
8037   if (!data->speed || lfreq <= 0)
8038     return;
8039
8040   int max_freq = lfreq;
8041   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8042     {
8043       body[i]->aux = (void *)(intptr_t) 1;
8044       if (max_freq < body[i]->count.to_frequency (cfun))
8045         max_freq = body[i]->count.to_frequency (cfun);
8046     }
8047   if (max_freq > lfreq)
8048     {
8049       int divisor, factor;
8050       /* Check if scaling factor itself needs to be scaled by the bound.  This
8051          is to avoid overflow when scaling cost according to profile info.  */
8052       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8053         {
8054           divisor = max_freq;
8055           factor = COST_SCALING_FACTOR_BOUND;
8056         }
8057       else
8058         {
8059           divisor = lfreq;
8060           factor = 1;
8061         }
8062       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8063         {
8064           int bfreq = body[i]->count.to_frequency (cfun);
8065           if (bfreq <= lfreq)
8066             continue;
8067
8068           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8069         }
8070     }
8071 }
8072
8073 /* Find doloop comparison use and set its doloop_p on if found.  */
8074
8075 static bool
8076 find_doloop_use (struct ivopts_data *data)
8077 {
8078   struct loop *loop = data->current_loop;
8079
8080   for (unsigned i = 0; i < data->vgroups.length (); i++)
8081     {
8082       struct iv_group *group = data->vgroups[i];
8083       if (group->type == USE_COMPARE)
8084         {
8085           gcc_assert (group->vuses.length () == 1);
8086           struct iv_use *use = group->vuses[0];
8087           gimple *stmt = use->stmt;
8088           if (gimple_code (stmt) == GIMPLE_COND)
8089             {
8090               basic_block bb = gimple_bb (stmt);
8091               edge true_edge, false_edge;
8092               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8093               /* This comparison is used for loop latch.  Require latch is empty
8094                  for now.  */
8095               if ((loop->latch == true_edge->dest
8096                    || loop->latch == false_edge->dest)
8097                   && empty_block_p (loop->latch))
8098                 {
8099                   group->doloop_p = true;
8100                   if (dump_file && (dump_flags & TDF_DETAILS))
8101                     {
8102                       fprintf (dump_file, "Doloop cmp iv use: ");
8103                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8104                     }
8105                   return true;
8106                 }
8107             }
8108         }
8109     }
8110
8111   return false;
8112 }
8113
8114 /* For the targets which support doloop, to predict whether later RTL doloop
8115    transformation will perform on this loop, further detect the doloop use and
8116    mark the flag doloop_use_p if predicted.  */
8117
8118 void
8119 analyze_and_mark_doloop_use (struct ivopts_data *data)
8120 {
8121   data->doloop_use_p = false;
8122
8123   if (!flag_branch_on_count_reg)
8124     return;
8125
8126   if (data->current_loop->unroll == USHRT_MAX)
8127     return;
8128
8129   if (!generic_predict_doloop_p (data))
8130     return;
8131
8132   if (find_doloop_use (data))
8133     {
8134       data->doloop_use_p = true;
8135       if (dump_file && (dump_flags & TDF_DETAILS))
8136         {
8137           struct loop *loop = data->current_loop;
8138           fprintf (dump_file,
8139                    "Predict loop %d can perform"
8140                    " doloop optimization later.\n",
8141                    loop->num);
8142           flow_loop_dump (loop, dump_file, NULL, 1);
8143         }
8144     }
8145 }
8146
8147 /* Optimizes the LOOP.  Returns true if anything changed.  */
8148
8149 static bool
8150 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8151                            bitmap toremove)
8152 {
8153   bool changed = false;
8154   class iv_ca *iv_ca;
8155   edge exit = single_dom_exit (loop);
8156   basic_block *body;
8157
8158   gcc_assert (!data->niters);
8159   data->current_loop = loop;
8160   data->loop_loc = find_loop_location (loop).get_location_t ();
8161   data->speed = optimize_loop_for_speed_p (loop);
8162
8163   if (dump_file && (dump_flags & TDF_DETAILS))
8164     {
8165       fprintf (dump_file, "Processing loop %d", loop->num);
8166       if (data->loop_loc != UNKNOWN_LOCATION)
8167         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8168                  LOCATION_LINE (data->loop_loc));
8169       fprintf (dump_file, "\n");
8170
8171       if (exit)
8172         {
8173           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
8174                    exit->src->index, exit->dest->index);
8175           print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8176                              0, TDF_SLIM);
8177           fprintf (dump_file, "\n");
8178         }
8179
8180       fprintf (dump_file, "\n");
8181     }
8182
8183   body = get_loop_body (loop);
8184   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8185   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8186
8187   data->loop_single_exit_p
8188     = exit != NULL && loop_only_exit_p (loop, body, exit);
8189
8190   /* For each ssa name determines whether it behaves as an induction variable
8191      in some loop.  */
8192   if (!find_induction_variables (data, body))
8193     goto finish;
8194
8195   /* Finds interesting uses (item 1).  */
8196   find_interesting_uses (data, body);
8197   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8198     goto finish;
8199
8200   /* Determine cost scaling factor for basic blocks in loop.  */
8201   determine_scaling_factor (data, body);
8202
8203   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8204   analyze_and_mark_doloop_use (data);
8205
8206   /* Finds candidates for the induction variables (item 2).  */
8207   find_iv_candidates (data);
8208
8209   /* Calculates the costs (item 3, part 1).  */
8210   determine_iv_costs (data);
8211   determine_group_iv_costs (data);
8212   determine_set_costs (data);
8213
8214   /* Find the optimal set of induction variables (item 3, part 2).  */
8215   iv_ca = find_optimal_iv_set (data);
8216   /* Cleanup basic block aux field.  */
8217   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8218     body[i]->aux = NULL;
8219   if (!iv_ca)
8220     goto finish;
8221   changed = true;
8222
8223   /* Create the new induction variables (item 4, part 1).  */
8224   create_new_ivs (data, iv_ca);
8225   iv_ca_free (&iv_ca);
8226
8227   /* Rewrite the uses (item 4, part 2).  */
8228   rewrite_groups (data);
8229
8230   /* Remove the ivs that are unused after rewriting.  */
8231   remove_unused_ivs (data, toremove);
8232
8233 finish:
8234   free (body);
8235   free_loop_data (data);
8236
8237   return changed;
8238 }
8239
8240 /* Main entry point.  Optimizes induction variables in loops.  */
8241
8242 void
8243 tree_ssa_iv_optimize (void)
8244 {
8245   struct ivopts_data data;
8246   auto_bitmap toremove;
8247
8248   tree_ssa_iv_optimize_init (&data);
8249   mark_ssa_maybe_undefs ();
8250
8251   /* Optimize the loops starting with the innermost ones.  */
8252   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8253     {
8254       if (!dbg_cnt (ivopts_loop))
8255         continue;
8256
8257       if (dump_file && (dump_flags & TDF_DETAILS))
8258         flow_loop_dump (loop, dump_file, NULL, 1);
8259
8260       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8261     }
8262
8263   /* Remove eliminated IV defs.  */
8264   release_defs_bitset (toremove);
8265
8266   /* We have changed the structure of induction variables; it might happen
8267      that definitions in the scev database refer to some of them that were
8268      eliminated.  */
8269   scev_reset_htab ();
8270   /* Likewise niter and control-IV information.  */
8271   free_numbers_of_iterations_estimates (cfun);
8272
8273   tree_ssa_iv_optimize_finalize (&data);
8274 }
8275
8276 #include "gt-tree-ssa-loop-ivopts.h"