gcc/tree-ssa-loop-ivopts.cc

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134 #include "cfganal.h"
 135
 136 /* For lang_hooks.types.type_for_mode.  */
 137 #include "langhooks.h"
 138
 139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 140    cost of different addressing modes.  This should be moved to a TBD
 141    interface between the GIMPLE and RTL worlds.  */
 142
 143 /* The infinite cost.  */
 144 #define INFTY 1000000000
 145
 146 /* Returns the expected number of loop iterations for LOOP.
 147    The average trip count is computed from profile data if it
 148    exists. */
 149
 150 static inline HOST_WIDE_INT
 151 avg_loop_niter (class loop *loop)
 152 {
 153   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 154   if (niter == -1)
 155     {
 156       niter = likely_max_stmt_executions_int (loop);
 157
 158       if (niter == -1 || niter > param_avg_loop_niter)
 159         return param_avg_loop_niter;
 160     }
 161
 162   return niter;
 163 }
 164
 165 struct iv_use;
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 175   bool biv_p;           /* Is it a biv?  */
 176   bool no_overflow;     /* True if the iv doesn't overflow.  */
 177   bool have_address_use;/* For biv, indicate if it's used in any address
 178                            type use.  */
 179 };
 180
 181 /* Per-ssa version information (induction variable descriptions, etc.).  */
 182 struct version_info
 183 {
 184   tree name;            /* The ssa name.  */
 185   struct iv *iv;        /* Induction variable description.  */
 186   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 187                            an expression that is not an induction variable.  */
 188   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 189   unsigned inv_id;      /* Id of an invariant.  */
 190 };
 191
 192 /* Types of uses.  */
 193 enum use_type
 194 {
 195   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 196   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 197                            reference.  */
 198   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 199                            cases where the expansion of the function
 200                            will turn the argument into a normal address.  */
 201   USE_COMPARE           /* Use is a compare.  */
 202 };
 203
 204 /* Cost of a computation.  */
 205 class comp_cost
 206 {
 207 public:
 208   comp_cost (): cost (0), complexity (0), scratch (0)
 209   {}
 210
 211   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 212     : cost (cost), complexity (complexity), scratch (scratch)
 213   {}
 214
 215   /* Returns true if COST is infinite.  */
 216   bool infinite_cost_p ();
 217
 218   /* Adds costs COST1 and COST2.  */
 219   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 220
 221   /* Adds COST to the comp_cost.  */
 222   comp_cost operator+= (comp_cost cost);
 223
 224   /* Adds constant C to this comp_cost.  */
 225   comp_cost operator+= (HOST_WIDE_INT c);
 226
 227   /* Subtracts constant C to this comp_cost.  */
 228   comp_cost operator-= (HOST_WIDE_INT c);
 229
 230   /* Divide the comp_cost by constant C.  */
 231   comp_cost operator/= (HOST_WIDE_INT c);
 232
 233   /* Multiply the comp_cost by constant C.  */
 234   comp_cost operator*= (HOST_WIDE_INT c);
 235
 236   /* Subtracts costs COST1 and COST2.  */
 237   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 238
 239   /* Subtracts COST from this comp_cost.  */
 240   comp_cost operator-= (comp_cost cost);
 241
 242   /* Returns true if COST1 is smaller than COST2.  */
 243   friend bool operator< (comp_cost cost1, comp_cost cost2);
 244
 245   /* Returns true if COST1 and COST2 are equal.  */
 246   friend bool operator== (comp_cost cost1, comp_cost cost2);
 247
 248   /* Returns true if COST1 is smaller or equal than COST2.  */
 249   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 250
 251   int64_t cost;         /* The runtime cost.  */
 252   unsigned complexity;  /* The estimate of the complexity of the code for
 253                            the computation (in no concrete units --
 254                            complexity field should be larger for more
 255                            complex expressions and addressing modes).  */
 256   int64_t scratch;      /* Scratch used during cost computation.  */
 257 };
 258
 259 static const comp_cost no_cost;
 260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 261
 262 bool
 263 comp_cost::infinite_cost_p ()
 264 {
 265   return cost == INFTY;
 266 }
 267
 268 comp_cost
 269 operator+ (comp_cost cost1, comp_cost cost2)
 270 {
 271   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 272     return infinite_cost;
 273
 274   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 275   cost1.cost += cost2.cost;
 276   cost1.complexity += cost2.complexity;
 277
 278   return cost1;
 279 }
 280
 281 comp_cost
 282 operator- (comp_cost cost1, comp_cost cost2)
 283 {
 284   if (cost1.infinite_cost_p ())
 285     return infinite_cost;
 286
 287   gcc_assert (!cost2.infinite_cost_p ());
 288   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 289
 290   cost1.cost -= cost2.cost;
 291   cost1.complexity -= cost2.complexity;
 292
 293   return cost1;
 294 }
 295
 296 comp_cost
 297 comp_cost::operator+= (comp_cost cost)
 298 {
 299   *this = *this + cost;
 300   return *this;
 301 }
 302
 303 comp_cost
 304 comp_cost::operator+= (HOST_WIDE_INT c)
 305 {
 306   if (c >= INFTY)
 307     this->cost = INFTY;
 308
 309   if (infinite_cost_p ())
 310     return *this;
 311
 312   gcc_assert (this->cost + c < infinite_cost.cost);
 313   this->cost += c;
 314
 315   return *this;
 316 }
 317
 318 comp_cost
 319 comp_cost::operator-= (HOST_WIDE_INT c)
 320 {
 321   if (infinite_cost_p ())
 322     return *this;
 323
 324   gcc_assert (this->cost - c < infinite_cost.cost);
 325   this->cost -= c;
 326
 327   return *this;
 328 }
 329
 330 comp_cost
 331 comp_cost::operator/= (HOST_WIDE_INT c)
 332 {
 333   gcc_assert (c != 0);
 334   if (infinite_cost_p ())
 335     return *this;
 336
 337   this->cost /= c;
 338
 339   return *this;
 340 }
 341
 342 comp_cost
 343 comp_cost::operator*= (HOST_WIDE_INT c)
 344 {
 345   if (infinite_cost_p ())
 346     return *this;
 347
 348   gcc_assert (this->cost * c < infinite_cost.cost);
 349   this->cost *= c;
 350
 351   return *this;
 352 }
 353
 354 comp_cost
 355 comp_cost::operator-= (comp_cost cost)
 356 {
 357   *this = *this - cost;
 358   return *this;
 359 }
 360
 361 bool
 362 operator< (comp_cost cost1, comp_cost cost2)
 363 {
 364   if (cost1.cost == cost2.cost)
 365     return cost1.complexity < cost2.complexity;
 366
 367   return cost1.cost < cost2.cost;
 368 }
 369
 370 bool
 371 operator== (comp_cost cost1, comp_cost cost2)
 372 {
 373   return cost1.cost == cost2.cost
 374     && cost1.complexity == cost2.complexity;
 375 }
 376
 377 bool
 378 operator<= (comp_cost cost1, comp_cost cost2)
 379 {
 380   return cost1 < cost2 || cost1 == cost2;
 381 }
 382
 383 struct iv_inv_expr_ent;
 384
 385 /* The candidate - cost pair.  */
 386 class cost_pair
 387 {
 388 public:
 389   struct iv_cand *cand; /* The candidate.  */
 390   comp_cost cost;       /* The cost.  */
 391   enum tree_code comp;  /* For iv elimination, the comparison.  */
 392   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 393                            preserved when representing iv_use with iv_cand.  */
 394   bitmap inv_exprs;     /* The list of newly created invariant expressions
 395                            when representing iv_use with iv_cand.  */
 396   tree value;           /* For final value elimination, the expression for
 397                            the final value of the iv.  For iv elimination,
 398                            the new bound to compare with.  */
 399 };
 400
 401 /* Use.  */
 402 struct iv_use
 403 {
 404   unsigned id;          /* The id of the use.  */
 405   unsigned group_id;    /* The group id the use belongs to.  */
 406   enum use_type type;   /* Type of the use.  */
 407   tree mem_type;        /* The memory type to use when testing whether an
 408                            address is legitimate, and what the address's
 409                            cost is.  */
 410   struct iv *iv;        /* The induction variable it is based on.  */
 411   gimple *stmt;         /* Statement in that it occurs.  */
 412   tree *op_p;           /* The place where it occurs.  */
 413
 414   tree addr_base;       /* Base address with const offset stripped.  */
 415   poly_uint64 addr_offset;
 416                         /* Const offset stripped from base address.  */
 417 };
 418
 419 /* Group of uses.  */
 420 struct iv_group
 421 {
 422   /* The id of the group.  */
 423   unsigned id;
 424   /* Uses of the group are of the same type.  */
 425   enum use_type type;
 426   /* The set of "related" IV candidates, plus the important ones.  */
 427   bitmap related_cands;
 428   /* Number of IV candidates in the cost_map.  */
 429   unsigned n_map_members;
 430   /* The costs wrto the iv candidates.  */
 431   class cost_pair *cost_map;
 432   /* The selected candidate for the group.  */
 433   struct iv_cand *selected;
 434   /* To indicate this is a doloop use group.  */
 435   bool doloop_p;
 436   /* Uses in the group.  */
 437   vec<struct iv_use *> vuses;
 438 };
 439
 440 /* The position where the iv is computed.  */
 441 enum iv_position
 442 {
 443   IP_NORMAL,            /* At the end, just before the exit condition.  */
 444   IP_END,               /* At the end of the latch block.  */
 445   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 446   IP_AFTER_USE,         /* Immediately after a specific use.  */
 447   IP_ORIGINAL           /* The original biv.  */
 448 };
 449
 450 /* The induction variable candidate.  */
 451 struct iv_cand
 452 {
 453   unsigned id;          /* The number of the candidate.  */
 454   bool important;       /* Whether this is an "important" candidate, i.e. such
 455                            that it should be considered by all uses.  */
 456   bool involves_undefs; /* Whether the IV involves undefined values.  */
 457   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 458   gimple *incremented_at;/* For original biv, the statement where it is
 459                            incremented.  */
 460   tree var_before;      /* The variable used for it before increment.  */
 461   tree var_after;       /* The variable used for it after increment.  */
 462   struct iv *iv;        /* The value of the candidate.  NULL for
 463                            "pseudocandidate" used to indicate the possibility
 464                            to replace the final value of an iv by direct
 465                            computation of the value.  */
 466   unsigned cost;        /* Cost of the candidate.  */
 467   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 468   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 469                               where it is incremented.  */
 470   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 471                            iv_cand.  */
 472   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 473                            handle it as a new invariant expression which will
 474                            be hoisted out of loop.  */
 475   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 476                            smaller type.  */
 477   bool doloop_p;        /* Whether this is a doloop candidate.  */
 478 };
 479
 480 /* Hashtable entry for common candidate derived from iv uses.  */
 481 class iv_common_cand
 482 {
 483 public:
 484   tree base;
 485   tree step;
 486   /* IV uses from which this common candidate is derived.  */
 487   auto_vec<struct iv_use *> uses;
 488   hashval_t hash;
 489 };
 490
 491 /* Hashtable helpers.  */
 492
 493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 494 {
 495   static inline hashval_t hash (const iv_common_cand *);
 496   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 497 };
 498
 499 /* Hash function for possible common candidates.  */
 500
 501 inline hashval_t
 502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 503 {
 504   return ccand->hash;
 505 }
 506
 507 /* Hash table equality function for common candidates.  */
 508
 509 inline bool
 510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 511                               const iv_common_cand *ccand2)
 512 {
 513   return (ccand1->hash == ccand2->hash
 514           && operand_equal_p (ccand1->base, ccand2->base, 0)
 515           && operand_equal_p (ccand1->step, ccand2->step, 0)
 516           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 517               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 518 }
 519
 520 /* Loop invariant expression hashtable entry.  */
 521
 522 struct iv_inv_expr_ent
 523 {
 524   /* Tree expression of the entry.  */
 525   tree expr;
 526   /* Unique indentifier.  */
 527   int id;
 528   /* Hash value.  */
 529   hashval_t hash;
 530 };
 531
 532 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 533
 534 static int
 535 sort_iv_inv_expr_ent (const void *a, const void *b)
 536 {
 537   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 538   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 539
 540   unsigned id1 = (*e1)->id;
 541   unsigned id2 = (*e2)->id;
 542
 543   if (id1 < id2)
 544     return -1;
 545   else if (id1 > id2)
 546     return 1;
 547   else
 548     return 0;
 549 }
 550
 551 /* Hashtable helpers.  */
 552
 553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 554 {
 555   static inline hashval_t hash (const iv_inv_expr_ent *);
 556   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 557 };
 558
 559 /* Return true if uses of type TYPE represent some form of address.  */
 560
 561 inline bool
 562 address_p (use_type type)
 563 {
 564   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 565 }
 566
 567 /* Hash function for loop invariant expressions.  */
 568
 569 inline hashval_t
 570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 571 {
 572   return expr->hash;
 573 }
 574
 575 /* Hash table equality function for expressions.  */
 576
 577 inline bool
 578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 579                            const iv_inv_expr_ent *expr2)
 580 {
 581   return expr1->hash == expr2->hash
 582          && operand_equal_p (expr1->expr, expr2->expr, 0);
 583 }
 584
 585 struct ivopts_data
 586 {
 587   /* The currently optimized loop.  */
 588   class loop *current_loop;
 589   location_t loop_loc;
 590
 591   /* Numbers of iterations for all exits of the current loop.  */
 592   hash_map<edge, tree_niter_desc *> *niters;
 593
 594   /* Number of registers used in it.  */
 595   unsigned regs_used;
 596
 597   /* The size of version_info array allocated.  */
 598   unsigned version_info_size;
 599
 600   /* The array of information for the ssa names.  */
 601   struct version_info *version_info;
 602
 603   /* The hashtable of loop invariant expressions created
 604      by ivopt.  */
 605   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 606
 607   /* The bitmap of indices in version_info whose value was changed.  */
 608   bitmap relevant;
 609
 610   /* The uses of induction variables.  */
 611   vec<iv_group *> vgroups;
 612
 613   /* The candidates.  */
 614   vec<iv_cand *> vcands;
 615
 616   /* A bitmap of important candidates.  */
 617   bitmap important_candidates;
 618
 619   /* Cache used by tree_to_aff_combination_expand.  */
 620   hash_map<tree, name_expansion *> *name_expansion_cache;
 621
 622   /* The hashtable of common candidates derived from iv uses.  */
 623   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 624
 625   /* The common candidates.  */
 626   vec<iv_common_cand *> iv_common_cands;
 627
 628   /* Hash map recording base object information of tree exp.  */
 629   hash_map<tree, tree> *base_object_map;
 630
 631   /* The maximum invariant variable id.  */
 632   unsigned max_inv_var_id;
 633
 634   /* The maximum invariant expression id.  */
 635   unsigned max_inv_expr_id;
 636
 637   /* Number of no_overflow BIVs which are not used in memory address.  */
 638   unsigned bivs_not_used_in_addr;
 639
 640   /* Obstack for iv structure.  */
 641   struct obstack iv_obstack;
 642
 643   /* Whether to consider just related and important candidates when replacing a
 644      use.  */
 645   bool consider_all_candidates;
 646
 647   /* Are we optimizing for speed?  */
 648   bool speed;
 649
 650   /* Whether the loop body includes any function calls.  */
 651   bool body_includes_call;
 652
 653   /* Whether the loop body can only be exited via single exit.  */
 654   bool loop_single_exit_p;
 655
 656   /* Whether the loop has doloop comparison use.  */
 657   bool doloop_use_p;
 658 };
 659
 660 /* An assignment of iv candidates to uses.  */
 661
 662 class iv_ca
 663 {
 664 public:
 665   /* The number of uses covered by the assignment.  */
 666   unsigned upto;
 667
 668   /* Number of uses that cannot be expressed by the candidates in the set.  */
 669   unsigned bad_groups;
 670
 671   /* Candidate assigned to a use, together with the related costs.  */
 672   class cost_pair **cand_for_group;
 673
 674   /* Number of times each candidate is used.  */
 675   unsigned *n_cand_uses;
 676
 677   /* The candidates used.  */
 678   bitmap cands;
 679
 680   /* The number of candidates in the set.  */
 681   unsigned n_cands;
 682
 683   /* The number of invariants needed, including both invariant variants and
 684      invariant expressions.  */
 685   unsigned n_invs;
 686
 687   /* Total cost of expressing uses.  */
 688   comp_cost cand_use_cost;
 689
 690   /* Total cost of candidates.  */
 691   int64_t cand_cost;
 692
 693   /* Number of times each invariant variable is used.  */
 694   unsigned *n_inv_var_uses;
 695
 696   /* Number of times each invariant expression is used.  */
 697   unsigned *n_inv_expr_uses;
 698
 699   /* Total cost of the assignment.  */
 700   comp_cost cost;
 701 };
 702
 703 /* Difference of two iv candidate assignments.  */
 704
 705 struct iv_ca_delta
 706 {
 707   /* Changed group.  */
 708   struct iv_group *group;
 709
 710   /* An old assignment (for rollback purposes).  */
 711   class cost_pair *old_cp;
 712
 713   /* A new assignment.  */
 714   class cost_pair *new_cp;
 715
 716   /* Next change in the list.  */
 717   struct iv_ca_delta *next;
 718 };
 719
 720 /* Bound on number of candidates below that all candidates are considered.  */
 721
 722 #define CONSIDER_ALL_CANDIDATES_BOUND \
 723   ((unsigned) param_iv_consider_all_candidates_bound)
 724
 725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 726    optimizing such a loop would help, and it would take ages).  */
 727
 728 #define MAX_CONSIDERED_GROUPS \
 729   ((unsigned) param_iv_max_considered_uses)
 730
 731 /* If there are at most this number of ivs in the set, try removing unnecessary
 732    ivs from the set always.  */
 733
 734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 735   ((unsigned) param_iv_always_prune_cand_set_bound)
 736
 737 /* The list of trees for that the decl_rtl field must be reset is stored
 738    here.  */
 739
 740 static vec<tree> decl_rtl_to_reset;
 741
 742 static comp_cost force_expr_to_var_cost (tree, bool);
 743
 744 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 745
 746 edge
 747 single_dom_exit (class loop *loop)
 748 {
 749   edge exit = single_exit (loop);
 750
 751   if (!exit)
 752     return NULL;
 753
 754   if (!just_once_each_iteration_p (loop, exit->src))
 755     return NULL;
 756
 757   return exit;
 758 }
 759
 760 /* Dumps information about the induction variable IV to FILE.  Don't dump
 761    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 762    preceding spaces indicated by INDENT_LEVEL.  */
 763
 764 void
 765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 766 {
 767   const char *p;
 768   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 769
 770   if (indent_level > 4)
 771     indent_level = 4;
 772   p = spaces + 8 - (indent_level << 1);
 773
 774   fprintf (file, "%sIV struct:\n", p);
 775   if (iv->ssa_name && dump_name)
 776     {
 777       fprintf (file, "%s  SSA_NAME:\t", p);
 778       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 779       fprintf (file, "\n");
 780     }
 781
 782   fprintf (file, "%s  Type:\t", p);
 783   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 784   fprintf (file, "\n");
 785
 786   fprintf (file, "%s  Base:\t", p);
 787   print_generic_expr (file, iv->base, TDF_SLIM);
 788   fprintf (file, "\n");
 789
 790   fprintf (file, "%s  Step:\t", p);
 791   print_generic_expr (file, iv->step, TDF_SLIM);
 792   fprintf (file, "\n");
 793
 794   if (iv->base_object)
 795     {
 796       fprintf (file, "%s  Object:\t", p);
 797       print_generic_expr (file, iv->base_object, TDF_SLIM);
 798       fprintf (file, "\n");
 799     }
 800
 801   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 802
 803   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 804            p, iv->no_overflow ? "No-overflow" : "Overflow");
 805 }
 806
 807 /* Dumps information about the USE to FILE.  */
 808
 809 void
 810 dump_use (FILE *file, struct iv_use *use)
 811 {
 812   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 813   fprintf (file, "    At stmt:\t");
 814   print_gimple_stmt (file, use->stmt, 0);
 815   fprintf (file, "    At pos:\t");
 816   if (use->op_p)
 817     print_generic_expr (file, *use->op_p, TDF_SLIM);
 818   fprintf (file, "\n");
 819   dump_iv (file, use->iv, false, 2);
 820 }
 821
 822 /* Dumps information about the uses to FILE.  */
 823
 824 void
 825 dump_groups (FILE *file, struct ivopts_data *data)
 826 {
 827   unsigned i, j;
 828   struct iv_group *group;
 829
 830   for (i = 0; i < data->vgroups.length (); i++)
 831     {
 832       group = data->vgroups[i];
 833       fprintf (file, "Group %d:\n", group->id);
 834       if (group->type == USE_NONLINEAR_EXPR)
 835         fprintf (file, "  Type:\tGENERIC\n");
 836       else if (group->type == USE_REF_ADDRESS)
 837         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 838       else if (group->type == USE_PTR_ADDRESS)
 839         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 840       else
 841         {
 842           gcc_assert (group->type == USE_COMPARE);
 843           fprintf (file, "  Type:\tCOMPARE\n");
 844         }
 845       for (j = 0; j < group->vuses.length (); j++)
 846         dump_use (file, group->vuses[j]);
 847     }
 848 }
 849
 850 /* Dumps information about induction variable candidate CAND to FILE.  */
 851
 852 void
 853 dump_cand (FILE *file, struct iv_cand *cand)
 854 {
 855   struct iv *iv = cand->iv;
 856
 857   fprintf (file, "Candidate %d:\n", cand->id);
 858   if (cand->inv_vars)
 859     {
 860       fprintf (file, "  Depend on inv.vars: ");
 861       dump_bitmap (file, cand->inv_vars);
 862     }
 863   if (cand->inv_exprs)
 864     {
 865       fprintf (file, "  Depend on inv.exprs: ");
 866       dump_bitmap (file, cand->inv_exprs);
 867     }
 868
 869   if (cand->var_before)
 870     {
 871       fprintf (file, "  Var befor: ");
 872       print_generic_expr (file, cand->var_before, TDF_SLIM);
 873       fprintf (file, "\n");
 874     }
 875   if (cand->var_after)
 876     {
 877       fprintf (file, "  Var after: ");
 878       print_generic_expr (file, cand->var_after, TDF_SLIM);
 879       fprintf (file, "\n");
 880     }
 881
 882   switch (cand->pos)
 883     {
 884     case IP_NORMAL:
 885       fprintf (file, "  Incr POS: before exit test\n");
 886       break;
 887
 888     case IP_BEFORE_USE:
 889       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 890       break;
 891
 892     case IP_AFTER_USE:
 893       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 894       break;
 895
 896     case IP_END:
 897       fprintf (file, "  Incr POS: at end\n");
 898       break;
 899
 900     case IP_ORIGINAL:
 901       fprintf (file, "  Incr POS: orig biv\n");
 902       break;
 903     }
 904
 905   dump_iv (file, iv, false, 1);
 906 }
 907
 908 /* Returns the info for ssa version VER.  */
 909
 910 static inline struct version_info *
 911 ver_info (struct ivopts_data *data, unsigned ver)
 912 {
 913   return data->version_info + ver;
 914 }
 915
 916 /* Returns the info for ssa name NAME.  */
 917
 918 static inline struct version_info *
 919 name_info (struct ivopts_data *data, tree name)
 920 {
 921   return ver_info (data, SSA_NAME_VERSION (name));
 922 }
 923
 924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 925    emitted in LOOP.  */
 926
 927 static bool
 928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 929 {
 930   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 931
 932   gcc_assert (bb);
 933
 934   if (sbb == loop->latch)
 935     return true;
 936
 937   if (sbb != bb)
 938     return false;
 939
 940   return stmt == last_nondebug_stmt (bb);
 941 }
 942
 943 /* Returns true if STMT if after the place where the original induction
 944    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 945    if the positions are identical.  */
 946
 947 static bool
 948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 949 {
 950   basic_block cand_bb = gimple_bb (cand->incremented_at);
 951   basic_block stmt_bb = gimple_bb (stmt);
 952
 953   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 954     return false;
 955
 956   if (stmt_bb != cand_bb)
 957     return true;
 958
 959   if (true_if_equal
 960       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 961     return true;
 962   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 963 }
 964
 965 /* Returns true if STMT if after the place where the induction variable
 966    CAND is incremented in LOOP.  */
 967
 968 static bool
 969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 970 {
 971   switch (cand->pos)
 972     {
 973     case IP_END:
 974       return false;
 975
 976     case IP_NORMAL:
 977       return stmt_after_ip_normal_pos (loop, stmt);
 978
 979     case IP_ORIGINAL:
 980     case IP_AFTER_USE:
 981       return stmt_after_inc_pos (cand, stmt, false);
 982
 983     case IP_BEFORE_USE:
 984       return stmt_after_inc_pos (cand, stmt, true);
 985
 986     default:
 987       gcc_unreachable ();
 988     }
 989 }
 990
 991 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 992
 993 static tree
 994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 995 {
 996   if (TREE_CODE (*tp) == SSA_NAME
 997       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 998     return *tp;
 999
1000   if (!EXPR_P (*tp))
1001     *walk_subtrees = 0;
1002
1003   return NULL_TREE;
1004 }
1005
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007    abnormal phi node.  */
1008
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1011 {
1012   return walk_tree_without_duplicates
1013            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1014 }
1015
1016 /*  Returns the structure describing number of iterations determined from
1017     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1018
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1021 {
1022   class tree_niter_desc *desc;
1023   tree_niter_desc **slot;
1024
1025   if (!data->niters)
1026     {
1027       data->niters = new hash_map<edge, tree_niter_desc *>;
1028       slot = NULL;
1029     }
1030   else
1031     slot = data->niters->get (exit);
1032
1033   if (!slot)
1034     {
1035       /* Try to determine number of iterations.  We cannot safely work with ssa
1036          names that appear in phi nodes on abnormal edges, so that we do not
1037          create overlapping life ranges for them (PR 27283).  */
1038       desc = XNEW (class tree_niter_desc);
1039       ::new (static_cast<void*> (desc)) tree_niter_desc ();
1040       if (!number_of_iterations_exit (data->current_loop,
1041                                       exit, desc, true)
1042           || contains_abnormal_ssa_name_p (desc->niter))
1043         {
1044           desc->~tree_niter_desc ();
1045           XDELETE (desc);
1046           desc = NULL;
1047         }
1048       data->niters->put (exit, desc);
1049     }
1050   else
1051     desc = *slot;
1052
1053   return desc;
1054 }
1055
1056 /* Returns the structure describing number of iterations determined from
1057    single dominating exit of DATA->current_loop, or NULL if something
1058    goes wrong.  */
1059
1060 static class tree_niter_desc *
1061 niter_for_single_dom_exit (struct ivopts_data *data)
1062 {
1063   edge exit = single_dom_exit (data->current_loop);
1064
1065   if (!exit)
1066     return NULL;
1067
1068   return niter_for_exit (data, exit);
1069 }
1070
1071 /* Initializes data structures used by the iv optimization pass, stored
1072    in DATA.  */
1073
1074 static void
1075 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1076 {
1077   data->version_info_size = 2 * num_ssa_names;
1078   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1079   data->relevant = BITMAP_ALLOC (NULL);
1080   data->important_candidates = BITMAP_ALLOC (NULL);
1081   data->max_inv_var_id = 0;
1082   data->max_inv_expr_id = 0;
1083   data->niters = NULL;
1084   data->vgroups.create (20);
1085   data->vcands.create (20);
1086   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1087   data->name_expansion_cache = NULL;
1088   data->base_object_map = NULL;
1089   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1090   data->iv_common_cands.create (20);
1091   decl_rtl_to_reset.create (20);
1092   gcc_obstack_init (&data->iv_obstack);
1093 }
1094
1095 /* walk_tree callback for determine_base_object.  */
1096
1097 static tree
1098 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1099 {
1100   tree_code code = TREE_CODE (*tp);
1101   tree obj = NULL_TREE;
1102   if (code == ADDR_EXPR)
1103     {
1104       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1105       if (!base)
1106         obj = *tp;
1107       else if (TREE_CODE (base) != MEM_REF)
1108         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1109     }
1110   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1111         obj = fold_convert (ptr_type_node, *tp);
1112
1113   if (!obj)
1114     {
1115       if (!EXPR_P (*tp))
1116         *walk_subtrees = 0;
1117
1118       return NULL_TREE;
1119     }
1120   /* Record special node for multiple base objects and stop.  */
1121   if (*static_cast<tree *> (wdata))
1122     {
1123       *static_cast<tree *> (wdata) = integer_zero_node;
1124       return integer_zero_node;
1125     }
1126   /* Record the base object and continue looking.  */
1127   *static_cast<tree *> (wdata) = obj;
1128   return NULL_TREE;
1129 }
1130
1131 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1132    are able to determine that it does not point to any such object; specially
1133    return integer_zero_node if EXPR contains multiple base objects.  */
1134
1135 static tree
1136 determine_base_object (struct ivopts_data *data, tree expr)
1137 {
1138   tree *slot, obj = NULL_TREE;
1139   if (data->base_object_map)
1140     {
1141       if ((slot = data->base_object_map->get(expr)) != NULL)
1142         return *slot;
1143     }
1144   else
1145     data->base_object_map = new hash_map<tree, tree>;
1146
1147   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1148   data->base_object_map->put (expr, obj);
1149   return obj;
1150 }
1151
1152 /* Return true if address expression with non-DECL_P operand appears
1153    in EXPR.  */
1154
1155 static bool
1156 contain_complex_addr_expr (tree expr)
1157 {
1158   bool res = false;
1159
1160   STRIP_NOPS (expr);
1161   switch (TREE_CODE (expr))
1162     {
1163     case POINTER_PLUS_EXPR:
1164     case PLUS_EXPR:
1165     case MINUS_EXPR:
1166       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1167       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1168       break;
1169
1170     case ADDR_EXPR:
1171       return (!DECL_P (TREE_OPERAND (expr, 0)));
1172
1173     default:
1174       return false;
1175     }
1176
1177   return res;
1178 }
1179
1180 /* Allocates an induction variable with given initial value BASE and step STEP
1181    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1182
1183 static struct iv *
1184 alloc_iv (struct ivopts_data *data, tree base, tree step,
1185           bool no_overflow = false)
1186 {
1187   tree expr = base;
1188   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1189                                               sizeof (struct iv));
1190   gcc_assert (step != NULL_TREE);
1191
1192   /* Lower address expression in base except ones with DECL_P as operand.
1193      By doing this:
1194        1) More accurate cost can be computed for address expressions;
1195        2) Duplicate candidates won't be created for bases in different
1196           forms, like &a[0] and &a.  */
1197   STRIP_NOPS (expr);
1198   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1199       || contain_complex_addr_expr (expr))
1200     {
1201       aff_tree comb;
1202       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1203       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1204     }
1205
1206   iv->base = base;
1207   iv->base_object = determine_base_object (data, base);
1208   iv->step = step;
1209   iv->biv_p = false;
1210   iv->nonlin_use = NULL;
1211   iv->ssa_name = NULL_TREE;
1212   if (!no_overflow
1213        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1214                               base, step))
1215     no_overflow = true;
1216   iv->no_overflow = no_overflow;
1217   iv->have_address_use = false;
1218
1219   return iv;
1220 }
1221
1222 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1223    doesn't overflow.  */
1224
1225 static void
1226 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1227         bool no_overflow)
1228 {
1229   struct version_info *info = name_info (data, iv);
1230
1231   gcc_assert (!info->iv);
1232
1233   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1234   info->iv = alloc_iv (data, base, step, no_overflow);
1235   info->iv->ssa_name = iv;
1236 }
1237
1238 /* Finds induction variable declaration for VAR.  */
1239
1240 static struct iv *
1241 get_iv (struct ivopts_data *data, tree var)
1242 {
1243   basic_block bb;
1244   tree type = TREE_TYPE (var);
1245
1246   if (!POINTER_TYPE_P (type)
1247       && !INTEGRAL_TYPE_P (type))
1248     return NULL;
1249
1250   if (!name_info (data, var)->iv)
1251     {
1252       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1253
1254       if (!bb
1255           || !flow_bb_inside_loop_p (data->current_loop, bb))
1256         {
1257           if (POINTER_TYPE_P (type))
1258             type = sizetype;
1259           set_iv (data, var, var, build_int_cst (type, 0), true);
1260         }
1261     }
1262
1263   return name_info (data, var)->iv;
1264 }
1265
1266 /* Return the first non-invariant ssa var found in EXPR.  */
1267
1268 static tree
1269 extract_single_var_from_expr (tree expr)
1270 {
1271   int i, n;
1272   tree tmp;
1273   enum tree_code code;
1274
1275   if (!expr || is_gimple_min_invariant (expr))
1276     return NULL;
1277
1278   code = TREE_CODE (expr);
1279   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1280     {
1281       n = TREE_OPERAND_LENGTH (expr);
1282       for (i = 0; i < n; i++)
1283         {
1284           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1285
1286           if (tmp)
1287             return tmp;
1288         }
1289     }
1290   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1291 }
1292
1293 /* Finds basic ivs.  */
1294
1295 static bool
1296 find_bivs (struct ivopts_data *data)
1297 {
1298   gphi *phi;
1299   affine_iv iv;
1300   tree step, type, base, stop;
1301   bool found = false;
1302   class loop *loop = data->current_loop;
1303   gphi_iterator psi;
1304
1305   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1306     {
1307       phi = psi.phi ();
1308
1309       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1310         continue;
1311
1312       if (virtual_operand_p (PHI_RESULT (phi)))
1313         continue;
1314
1315       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1316         continue;
1317
1318       if (integer_zerop (iv.step))
1319         continue;
1320
1321       step = iv.step;
1322       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1323       /* Stop expanding iv base at the first ssa var referred by iv step.
1324          Ideally we should stop at any ssa var, because that's expensive
1325          and unusual to happen, we just do it on the first one.
1326
1327          See PR64705 for the rationale.  */
1328       stop = extract_single_var_from_expr (step);
1329       base = expand_simple_operations (base, stop);
1330       if (contains_abnormal_ssa_name_p (base)
1331           || contains_abnormal_ssa_name_p (step))
1332         continue;
1333
1334       type = TREE_TYPE (PHI_RESULT (phi));
1335       base = fold_convert (type, base);
1336       if (step)
1337         {
1338           if (POINTER_TYPE_P (type))
1339             step = convert_to_ptrofftype (step);
1340           else
1341             step = fold_convert (type, step);
1342         }
1343
1344       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1345       found = true;
1346     }
1347
1348   return found;
1349 }
1350
1351 /* Marks basic ivs.  */
1352
1353 static void
1354 mark_bivs (struct ivopts_data *data)
1355 {
1356   gphi *phi;
1357   gimple *def;
1358   tree var;
1359   struct iv *iv, *incr_iv;
1360   class loop *loop = data->current_loop;
1361   basic_block incr_bb;
1362   gphi_iterator psi;
1363
1364   data->bivs_not_used_in_addr = 0;
1365   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1366     {
1367       phi = psi.phi ();
1368
1369       iv = get_iv (data, PHI_RESULT (phi));
1370       if (!iv)
1371         continue;
1372
1373       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1374       def = SSA_NAME_DEF_STMT (var);
1375       /* Don't mark iv peeled from other one as biv.  */
1376       if (def
1377           && gimple_code (def) == GIMPLE_PHI
1378           && gimple_bb (def) == loop->header)
1379         continue;
1380
1381       incr_iv = get_iv (data, var);
1382       if (!incr_iv)
1383         continue;
1384
1385       /* If the increment is in the subloop, ignore it.  */
1386       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1387       if (incr_bb->loop_father != data->current_loop
1388           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1389         continue;
1390
1391       iv->biv_p = true;
1392       incr_iv->biv_p = true;
1393       if (iv->no_overflow)
1394         data->bivs_not_used_in_addr++;
1395       if (incr_iv->no_overflow)
1396         data->bivs_not_used_in_addr++;
1397     }
1398 }
1399
1400 /* Checks whether STMT defines a linear induction variable and stores its
1401    parameters to IV.  */
1402
1403 static bool
1404 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1405 {
1406   tree lhs, stop;
1407   class loop *loop = data->current_loop;
1408
1409   iv->base = NULL_TREE;
1410   iv->step = NULL_TREE;
1411
1412   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1413     return false;
1414
1415   lhs = gimple_assign_lhs (stmt);
1416   if (TREE_CODE (lhs) != SSA_NAME)
1417     return false;
1418
1419   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1420     return false;
1421
1422   /* Stop expanding iv base at the first ssa var referred by iv step.
1423      Ideally we should stop at any ssa var, because that's expensive
1424      and unusual to happen, we just do it on the first one.
1425
1426      See PR64705 for the rationale.  */
1427   stop = extract_single_var_from_expr (iv->step);
1428   iv->base = expand_simple_operations (iv->base, stop);
1429   if (contains_abnormal_ssa_name_p (iv->base)
1430       || contains_abnormal_ssa_name_p (iv->step))
1431     return false;
1432
1433   /* If STMT could throw, then do not consider STMT as defining a GIV.
1434      While this will suppress optimizations, we cannot safely delete this
1435      GIV and associated statements, even if it appears it is not used.  */
1436   if (stmt_could_throw_p (cfun, stmt))
1437     return false;
1438
1439   return true;
1440 }
1441
1442 /* Finds general ivs in statement STMT.  */
1443
1444 static void
1445 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1446 {
1447   affine_iv iv;
1448
1449   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1450     return;
1451
1452   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1453 }
1454
1455 /* Finds general ivs in basic block BB.  */
1456
1457 static void
1458 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1459 {
1460   gimple_stmt_iterator bsi;
1461
1462   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1463     find_givs_in_stmt (data, gsi_stmt (bsi));
1464 }
1465
1466 /* Finds general ivs.  */
1467
1468 static void
1469 find_givs (struct ivopts_data *data, basic_block *body)
1470 {
1471   class loop *loop = data->current_loop;
1472   unsigned i;
1473
1474   for (i = 0; i < loop->num_nodes; i++)
1475     find_givs_in_bb (data, body[i]);
1476 }
1477
1478 /* For each ssa name defined in LOOP determines whether it is an induction
1479    variable and if so, its initial value and step.  */
1480
1481 static bool
1482 find_induction_variables (struct ivopts_data *data, basic_block *body)
1483 {
1484   unsigned i;
1485   bitmap_iterator bi;
1486
1487   if (!find_bivs (data))
1488     return false;
1489
1490   find_givs (data, body);
1491   mark_bivs (data);
1492
1493   if (dump_file && (dump_flags & TDF_DETAILS))
1494     {
1495       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1496
1497       if (niter)
1498         {
1499           fprintf (dump_file, "  number of iterations ");
1500           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1501           if (!integer_zerop (niter->may_be_zero))
1502             {
1503               fprintf (dump_file, "; zero if ");
1504               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1505             }
1506           fprintf (dump_file, "\n");
1507         };
1508
1509       fprintf (dump_file, "\n<Induction Vars>:\n");
1510       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1511         {
1512           struct version_info *info = ver_info (data, i);
1513           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1514             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1515         }
1516     }
1517
1518   return true;
1519 }
1520
1521 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1522    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1523    is the const offset stripped from IV base and MEM_TYPE is the type
1524    of the memory being addressed.  For uses of other types, ADDR_BASE
1525    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1526
1527 static struct iv_use *
1528 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1529             gimple *stmt, enum use_type type, tree mem_type,
1530             tree addr_base, poly_uint64 addr_offset)
1531 {
1532   struct iv_use *use = XCNEW (struct iv_use);
1533
1534   use->id = group->vuses.length ();
1535   use->group_id = group->id;
1536   use->type = type;
1537   use->mem_type = mem_type;
1538   use->iv = iv;
1539   use->stmt = stmt;
1540   use->op_p = use_p;
1541   use->addr_base = addr_base;
1542   use->addr_offset = addr_offset;
1543
1544   group->vuses.safe_push (use);
1545   return use;
1546 }
1547
1548 /* Checks whether OP is a loop-level invariant and if so, records it.
1549    NONLINEAR_USE is true if the invariant is used in a way we do not
1550    handle specially.  */
1551
1552 static void
1553 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1554 {
1555   basic_block bb;
1556   struct version_info *info;
1557
1558   if (TREE_CODE (op) != SSA_NAME
1559       || virtual_operand_p (op))
1560     return;
1561
1562   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1563   if (bb
1564       && flow_bb_inside_loop_p (data->current_loop, bb))
1565     return;
1566
1567   info = name_info (data, op);
1568   info->name = op;
1569   info->has_nonlin_use |= nonlinear_use;
1570   if (!info->inv_id)
1571     info->inv_id = ++data->max_inv_var_id;
1572   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1573 }
1574
1575 /* Record a group of TYPE.  */
1576
1577 static struct iv_group *
1578 record_group (struct ivopts_data *data, enum use_type type)
1579 {
1580   struct iv_group *group = XCNEW (struct iv_group);
1581
1582   group->id = data->vgroups.length ();
1583   group->type = type;
1584   group->related_cands = BITMAP_ALLOC (NULL);
1585   group->vuses.create (1);
1586   group->doloop_p = false;
1587
1588   data->vgroups.safe_push (group);
1589   return group;
1590 }
1591
1592 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1593    New group will be created if there is no existing group for the use.
1594    MEM_TYPE is the type of memory being addressed, or NULL if this
1595    isn't an address reference.  */
1596
1597 static struct iv_use *
1598 record_group_use (struct ivopts_data *data, tree *use_p,
1599                   struct iv *iv, gimple *stmt, enum use_type type,
1600                   tree mem_type)
1601 {
1602   tree addr_base = NULL;
1603   struct iv_group *group = NULL;
1604   poly_uint64 addr_offset = 0;
1605
1606   /* Record non address type use in a new group.  */
1607   if (address_p (type))
1608     {
1609       unsigned int i;
1610
1611       gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1612       tree addr_toffset;
1613       split_constant_offset (iv->base, &addr_base, &addr_toffset);
1614       addr_offset = int_cst_value (addr_toffset);
1615       for (i = 0; i < data->vgroups.length (); i++)
1616         {
1617           struct iv_use *use;
1618
1619           group = data->vgroups[i];
1620           use = group->vuses[0];
1621           if (!address_p (use->type))
1622             continue;
1623
1624           /* Check if it has the same stripped base and step.  */
1625           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1626               && operand_equal_p (iv->step, use->iv->step, 0)
1627               && operand_equal_p (addr_base, use->addr_base, 0))
1628             break;
1629         }
1630       if (i == data->vgroups.length ())
1631         group = NULL;
1632     }
1633
1634   if (!group)
1635     group = record_group (data, type);
1636
1637   return record_use (group, use_p, iv, stmt, type, mem_type,
1638                      addr_base, addr_offset);
1639 }
1640
1641 /* Checks whether the use OP is interesting and if so, records it.  */
1642
1643 static struct iv_use *
1644 find_interesting_uses_op (struct ivopts_data *data, tree op)
1645 {
1646   struct iv *iv;
1647   gimple *stmt;
1648   struct iv_use *use;
1649
1650   if (TREE_CODE (op) != SSA_NAME)
1651     return NULL;
1652
1653   iv = get_iv (data, op);
1654   if (!iv)
1655     return NULL;
1656
1657   if (iv->nonlin_use)
1658     {
1659       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1660       return iv->nonlin_use;
1661     }
1662
1663   if (integer_zerop (iv->step))
1664     {
1665       record_invariant (data, op, true);
1666       return NULL;
1667     }
1668
1669   stmt = SSA_NAME_DEF_STMT (op);
1670   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1671
1672   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1673   iv->nonlin_use = use;
1674   return use;
1675 }
1676
1677 /* Indicate how compare type iv_use can be handled.  */
1678 enum comp_iv_rewrite
1679 {
1680   COMP_IV_NA,
1681   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1682   COMP_IV_EXPR,
1683   /* We may rewrite compare type iv_uses on both sides of comparison by
1684      expressing value of each iv_use.  */
1685   COMP_IV_EXPR_2,
1686   /* We may rewrite compare type iv_use by expressing value of the iv_use
1687      or by eliminating it with other iv_cand.  */
1688   COMP_IV_ELIM
1689 };
1690
1691 /* Given a condition in statement STMT, checks whether it is a compare
1692    of an induction variable and an invariant.  If this is the case,
1693    CONTROL_VAR is set to location of the iv, BOUND to the location of
1694    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1695    induction variable descriptions, and true is returned.  If this is not
1696    the case, CONTROL_VAR and BOUND are set to the arguments of the
1697    condition and false is returned.  */
1698
1699 static enum comp_iv_rewrite
1700 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1701                        tree **control_var, tree **bound,
1702                        struct iv **iv_var, struct iv **iv_bound)
1703 {
1704   /* The objects returned when COND has constant operands.  */
1705   static struct iv const_iv;
1706   static tree zero;
1707   tree *op0 = &zero, *op1 = &zero;
1708   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1709   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1710
1711   if (gimple_code (stmt) == GIMPLE_COND)
1712     {
1713       gcond *cond_stmt = as_a <gcond *> (stmt);
1714       op0 = gimple_cond_lhs_ptr (cond_stmt);
1715       op1 = gimple_cond_rhs_ptr (cond_stmt);
1716     }
1717   else
1718     {
1719       op0 = gimple_assign_rhs1_ptr (stmt);
1720       op1 = gimple_assign_rhs2_ptr (stmt);
1721     }
1722
1723   zero = integer_zero_node;
1724   const_iv.step = integer_zero_node;
1725
1726   if (TREE_CODE (*op0) == SSA_NAME)
1727     iv0 = get_iv (data, *op0);
1728   if (TREE_CODE (*op1) == SSA_NAME)
1729     iv1 = get_iv (data, *op1);
1730
1731   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1732   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1733     {
1734       rewrite_type = COMP_IV_EXPR_2;
1735       goto end;
1736     }
1737
1738   /* If none side of comparison is IV.  */
1739   if ((!iv0 || integer_zerop (iv0->step))
1740       && (!iv1 || integer_zerop (iv1->step)))
1741     goto end;
1742
1743   /* Control variable may be on the other side.  */
1744   if (!iv0 || integer_zerop (iv0->step))
1745     {
1746       std::swap (op0, op1);
1747       std::swap (iv0, iv1);
1748     }
1749   /* If one side is IV and the other side isn't loop invariant.  */
1750   if (!iv1)
1751     rewrite_type = COMP_IV_EXPR;
1752   /* If one side is IV and the other side is loop invariant.  */
1753   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1754     rewrite_type = COMP_IV_ELIM;
1755
1756 end:
1757   if (control_var)
1758     *control_var = op0;
1759   if (iv_var)
1760     *iv_var = iv0;
1761   if (bound)
1762     *bound = op1;
1763   if (iv_bound)
1764     *iv_bound = iv1;
1765
1766   return rewrite_type;
1767 }
1768
1769 /* Checks whether the condition in STMT is interesting and if so,
1770    records it.  */
1771
1772 static void
1773 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1774 {
1775   tree *var_p, *bound_p;
1776   struct iv *var_iv, *bound_iv;
1777   enum comp_iv_rewrite ret;
1778
1779   ret = extract_cond_operands (data, stmt,
1780                                &var_p, &bound_p, &var_iv, &bound_iv);
1781   if (ret == COMP_IV_NA)
1782     {
1783       find_interesting_uses_op (data, *var_p);
1784       find_interesting_uses_op (data, *bound_p);
1785       return;
1786     }
1787
1788   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1789   /* Record compare type iv_use for iv on the other side of comparison.  */
1790   if (ret == COMP_IV_EXPR_2)
1791     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1792 }
1793
1794 /* Returns the outermost loop EXPR is obviously invariant in
1795    relative to the loop LOOP, i.e. if all its operands are defined
1796    outside of the returned loop.  Returns NULL if EXPR is not
1797    even obviously invariant in LOOP.  */
1798
1799 class loop *
1800 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1801 {
1802   basic_block def_bb;
1803   unsigned i, len;
1804
1805   if (is_gimple_min_invariant (expr))
1806     return current_loops->tree_root;
1807
1808   if (TREE_CODE (expr) == SSA_NAME)
1809     {
1810       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1811       if (def_bb)
1812         {
1813           if (flow_bb_inside_loop_p (loop, def_bb))
1814             return NULL;
1815           return superloop_at_depth (loop,
1816                                      loop_depth (def_bb->loop_father) + 1);
1817         }
1818
1819       return current_loops->tree_root;
1820     }
1821
1822   if (!EXPR_P (expr))
1823     return NULL;
1824
1825   unsigned maxdepth = 0;
1826   len = TREE_OPERAND_LENGTH (expr);
1827   for (i = 0; i < len; i++)
1828     {
1829       class loop *ivloop;
1830       if (!TREE_OPERAND (expr, i))
1831         continue;
1832
1833       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1834       if (!ivloop)
1835         return NULL;
1836       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1837     }
1838
1839   return superloop_at_depth (loop, maxdepth);
1840 }
1841
1842 /* Returns true if expression EXPR is obviously invariant in LOOP,
1843    i.e. if all its operands are defined outside of the LOOP.  LOOP
1844    should not be the function body.  */
1845
1846 bool
1847 expr_invariant_in_loop_p (class loop *loop, tree expr)
1848 {
1849   basic_block def_bb;
1850   unsigned i, len;
1851
1852   gcc_assert (loop_depth (loop) > 0);
1853
1854   if (is_gimple_min_invariant (expr))
1855     return true;
1856
1857   if (TREE_CODE (expr) == SSA_NAME)
1858     {
1859       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1860       if (def_bb
1861           && flow_bb_inside_loop_p (loop, def_bb))
1862         return false;
1863
1864       return true;
1865     }
1866
1867   if (!EXPR_P (expr))
1868     return false;
1869
1870   len = TREE_OPERAND_LENGTH (expr);
1871   for (i = 0; i < len; i++)
1872     if (TREE_OPERAND (expr, i)
1873         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1874       return false;
1875
1876   return true;
1877 }
1878
1879 /* Given expression EXPR which computes inductive values with respect
1880    to loop recorded in DATA, this function returns biv from which EXPR
1881    is derived by tracing definition chains of ssa variables in EXPR.  */
1882
1883 static struct iv*
1884 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1885 {
1886   struct iv *iv;
1887   unsigned i, n;
1888   tree e2, e1;
1889   enum tree_code code;
1890   gimple *stmt;
1891
1892   if (expr == NULL_TREE)
1893     return NULL;
1894
1895   if (is_gimple_min_invariant (expr))
1896     return NULL;
1897
1898   code = TREE_CODE (expr);
1899   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1900     {
1901       n = TREE_OPERAND_LENGTH (expr);
1902       for (i = 0; i < n; i++)
1903         {
1904           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1905           if (iv)
1906             return iv;
1907         }
1908     }
1909
1910   /* Stop if it's not ssa name.  */
1911   if (code != SSA_NAME)
1912     return NULL;
1913
1914   iv = get_iv (data, expr);
1915   if (!iv || integer_zerop (iv->step))
1916     return NULL;
1917   else if (iv->biv_p)
1918     return iv;
1919
1920   stmt = SSA_NAME_DEF_STMT (expr);
1921   if (gphi *phi = dyn_cast <gphi *> (stmt))
1922     {
1923       ssa_op_iter iter;
1924       use_operand_p use_p;
1925       basic_block phi_bb = gimple_bb (phi);
1926
1927       /* Skip loop header PHI that doesn't define biv.  */
1928       if (phi_bb->loop_father == data->current_loop)
1929         return NULL;
1930
1931       if (virtual_operand_p (gimple_phi_result (phi)))
1932         return NULL;
1933
1934       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1935         {
1936           tree use = USE_FROM_PTR (use_p);
1937           iv = find_deriving_biv_for_expr (data, use);
1938           if (iv)
1939             return iv;
1940         }
1941       return NULL;
1942     }
1943   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1944     return NULL;
1945
1946   e1 = gimple_assign_rhs1 (stmt);
1947   code = gimple_assign_rhs_code (stmt);
1948   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1949     return find_deriving_biv_for_expr (data, e1);
1950
1951   switch (code)
1952     {
1953     case MULT_EXPR:
1954     case PLUS_EXPR:
1955     case MINUS_EXPR:
1956     case POINTER_PLUS_EXPR:
1957       /* Increments, decrements and multiplications by a constant
1958          are simple.  */
1959       e2 = gimple_assign_rhs2 (stmt);
1960       iv = find_deriving_biv_for_expr (data, e2);
1961       if (iv)
1962         return iv;
1963       gcc_fallthrough ();
1964
1965     CASE_CONVERT:
1966       /* Casts are simple.  */
1967       return find_deriving_biv_for_expr (data, e1);
1968
1969     default:
1970       break;
1971     }
1972
1973   return NULL;
1974 }
1975
1976 /* Record BIV, its predecessor and successor that they are used in
1977    address type uses.  */
1978
1979 static void
1980 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1981 {
1982   unsigned i;
1983   tree type, base_1, base_2;
1984   bitmap_iterator bi;
1985
1986   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1987       || biv->have_address_use || !biv->no_overflow)
1988     return;
1989
1990   type = TREE_TYPE (biv->base);
1991   if (!INTEGRAL_TYPE_P (type))
1992     return;
1993
1994   biv->have_address_use = true;
1995   data->bivs_not_used_in_addr--;
1996   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1997   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1998     {
1999       struct iv *iv = ver_info (data, i)->iv;
2000
2001       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2002           || iv->have_address_use || !iv->no_overflow)
2003         continue;
2004
2005       if (type != TREE_TYPE (iv->base)
2006           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2007         continue;
2008
2009       if (!operand_equal_p (biv->step, iv->step, 0))
2010         continue;
2011
2012       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2013       if (operand_equal_p (base_1, iv->base, 0)
2014           || operand_equal_p (base_2, biv->base, 0))
2015         {
2016           iv->have_address_use = true;
2017           data->bivs_not_used_in_addr--;
2018         }
2019     }
2020 }
2021
2022 /* Cumulates the steps of indices into DATA and replaces their values with the
2023    initial ones.  Returns false when the value of the index cannot be determined.
2024    Callback for for_each_index.  */
2025
2026 struct ifs_ivopts_data
2027 {
2028   struct ivopts_data *ivopts_data;
2029   gimple *stmt;
2030   tree step;
2031 };
2032
2033 static bool
2034 idx_find_step (tree base, tree *idx, void *data)
2035 {
2036   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2037   struct iv *iv;
2038   bool use_overflow_semantics = false;
2039   tree step, iv_base, iv_step, lbound, off;
2040   class loop *loop = dta->ivopts_data->current_loop;
2041
2042   /* If base is a component ref, require that the offset of the reference
2043      be invariant.  */
2044   if (TREE_CODE (base) == COMPONENT_REF)
2045     {
2046       off = component_ref_field_offset (base);
2047       return expr_invariant_in_loop_p (loop, off);
2048     }
2049
2050   /* If base is array, first check whether we will be able to move the
2051      reference out of the loop (in order to take its address in strength
2052      reduction).  In order for this to work we need both lower bound
2053      and step to be loop invariants.  */
2054   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2055     {
2056       /* Moreover, for a range, the size needs to be invariant as well.  */
2057       if (TREE_CODE (base) == ARRAY_RANGE_REF
2058           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2059         return false;
2060
2061       step = array_ref_element_size (base);
2062       lbound = array_ref_low_bound (base);
2063
2064       if (!expr_invariant_in_loop_p (loop, step)
2065           || !expr_invariant_in_loop_p (loop, lbound))
2066         return false;
2067     }
2068
2069   if (TREE_CODE (*idx) != SSA_NAME)
2070     return true;
2071
2072   iv = get_iv (dta->ivopts_data, *idx);
2073   if (!iv)
2074     return false;
2075
2076   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2077           *&x[0], which is not folded and does not trigger the
2078           ARRAY_REF path below.  */
2079   *idx = iv->base;
2080
2081   if (integer_zerop (iv->step))
2082     return true;
2083
2084   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2085     {
2086       step = array_ref_element_size (base);
2087
2088       /* We only handle addresses whose step is an integer constant.  */
2089       if (TREE_CODE (step) != INTEGER_CST)
2090         return false;
2091     }
2092   else
2093     /* The step for pointer arithmetics already is 1 byte.  */
2094     step = size_one_node;
2095
2096   iv_base = iv->base;
2097   iv_step = iv->step;
2098   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2099     use_overflow_semantics = true;
2100
2101   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2102                             sizetype, &iv_base, &iv_step, dta->stmt,
2103                             use_overflow_semantics))
2104     {
2105       /* The index might wrap.  */
2106       return false;
2107     }
2108
2109   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2110   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2111
2112   if (dta->ivopts_data->bivs_not_used_in_addr)
2113     {
2114       if (!iv->biv_p)
2115         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2116
2117       record_biv_for_address_use (dta->ivopts_data, iv);
2118     }
2119   return true;
2120 }
2121
2122 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2123    object is passed to it in DATA.  */
2124
2125 static bool
2126 idx_record_use (tree base, tree *idx,
2127                 void *vdata)
2128 {
2129   struct ivopts_data *data = (struct ivopts_data *) vdata;
2130   find_interesting_uses_op (data, *idx);
2131   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2132     {
2133       if (TREE_OPERAND (base, 2))
2134         find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2135       if (TREE_OPERAND (base, 3))
2136         find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2137     }
2138   return true;
2139 }
2140
2141 /* If we can prove that TOP = cst * BOT for some constant cst,
2142    store cst to MUL and return true.  Otherwise return false.
2143    The returned value is always sign-extended, regardless of the
2144    signedness of TOP and BOT.  */
2145
2146 static bool
2147 constant_multiple_of (tree top, tree bot, widest_int *mul)
2148 {
2149   tree mby;
2150   enum tree_code code;
2151   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2152   widest_int res, p0, p1;
2153
2154   STRIP_NOPS (top);
2155   STRIP_NOPS (bot);
2156
2157   if (operand_equal_p (top, bot, 0))
2158     {
2159       *mul = 1;
2160       return true;
2161     }
2162
2163   code = TREE_CODE (top);
2164   switch (code)
2165     {
2166     case MULT_EXPR:
2167       mby = TREE_OPERAND (top, 1);
2168       if (TREE_CODE (mby) != INTEGER_CST)
2169         return false;
2170
2171       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2172         return false;
2173
2174       *mul = wi::sext (res * wi::to_widest (mby), precision);
2175       return true;
2176
2177     case PLUS_EXPR:
2178     case MINUS_EXPR:
2179       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2180           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2181         return false;
2182
2183       if (code == MINUS_EXPR)
2184         p1 = -p1;
2185       *mul = wi::sext (p0 + p1, precision);
2186       return true;
2187
2188     case INTEGER_CST:
2189       if (TREE_CODE (bot) != INTEGER_CST)
2190         return false;
2191
2192       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2193       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2194       if (p1 == 0)
2195         return false;
2196       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2197       return res == 0;
2198
2199     default:
2200       if (POLY_INT_CST_P (top)
2201           && POLY_INT_CST_P (bot)
2202           && constant_multiple_p (wi::to_poly_widest (top),
2203                                   wi::to_poly_widest (bot), mul))
2204         return true;
2205
2206       return false;
2207     }
2208 }
2209
2210 /* Return true if memory reference REF with step STEP may be unaligned.  */
2211
2212 static bool
2213 may_be_unaligned_p (tree ref, tree step)
2214 {
2215   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2216      thus they are not misaligned.  */
2217   if (TREE_CODE (ref) == TARGET_MEM_REF)
2218     return false;
2219
2220   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2221   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2222     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2223
2224   unsigned HOST_WIDE_INT bitpos;
2225   unsigned int ref_align;
2226   get_object_alignment_1 (ref, &ref_align, &bitpos);
2227   if (ref_align < align
2228       || (bitpos % align) != 0
2229       || (bitpos % BITS_PER_UNIT) != 0)
2230     return true;
2231
2232   unsigned int trailing_zeros = tree_ctz (step);
2233   if (trailing_zeros < HOST_BITS_PER_INT
2234       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2235     return true;
2236
2237   return false;
2238 }
2239
2240 /* Return true if EXPR may be non-addressable.   */
2241
2242 bool
2243 may_be_nonaddressable_p (tree expr)
2244 {
2245   switch (TREE_CODE (expr))
2246     {
2247     case VAR_DECL:
2248       /* Check if it's a register variable.  */
2249       return DECL_HARD_REGISTER (expr);
2250
2251     case TARGET_MEM_REF:
2252       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2253          target, thus they are always addressable.  */
2254       return false;
2255
2256     case MEM_REF:
2257       /* Likewise for MEM_REFs, modulo the storage order.  */
2258       return REF_REVERSE_STORAGE_ORDER (expr);
2259
2260     case BIT_FIELD_REF:
2261       if (REF_REVERSE_STORAGE_ORDER (expr))
2262         return true;
2263       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2264
2265     case COMPONENT_REF:
2266       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2267         return true;
2268       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2269              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2270
2271     case ARRAY_REF:
2272     case ARRAY_RANGE_REF:
2273       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2274         return true;
2275       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2276
2277     case VIEW_CONVERT_EXPR:
2278       /* This kind of view-conversions may wrap non-addressable objects
2279          and make them look addressable.  After some processing the
2280          non-addressability may be uncovered again, causing ADDR_EXPRs
2281          of inappropriate objects to be built.  */
2282       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2283           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2284         return true;
2285       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2286
2287     CASE_CONVERT:
2288       return true;
2289
2290     default:
2291       break;
2292     }
2293
2294   return false;
2295 }
2296
2297 /* Finds addresses in *OP_P inside STMT.  */
2298
2299 static void
2300 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2301                                tree *op_p)
2302 {
2303   tree base = *op_p, step = size_zero_node;
2304   struct iv *civ;
2305   struct ifs_ivopts_data ifs_ivopts_data;
2306
2307   /* Do not play with volatile memory references.  A bit too conservative,
2308      perhaps, but safe.  */
2309   if (gimple_has_volatile_ops (stmt))
2310     goto fail;
2311
2312   /* Ignore bitfields for now.  Not really something terribly complicated
2313      to handle.  TODO.  */
2314   if (TREE_CODE (base) == BIT_FIELD_REF)
2315     goto fail;
2316
2317   base = unshare_expr (base);
2318
2319   if (TREE_CODE (base) == TARGET_MEM_REF)
2320     {
2321       tree type = build_pointer_type (TREE_TYPE (base));
2322       tree astep;
2323
2324       if (TMR_BASE (base)
2325           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2326         {
2327           civ = get_iv (data, TMR_BASE (base));
2328           if (!civ)
2329             goto fail;
2330
2331           TMR_BASE (base) = civ->base;
2332           step = civ->step;
2333         }
2334       if (TMR_INDEX2 (base)
2335           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2336         {
2337           civ = get_iv (data, TMR_INDEX2 (base));
2338           if (!civ)
2339             goto fail;
2340
2341           TMR_INDEX2 (base) = civ->base;
2342           step = civ->step;
2343         }
2344       if (TMR_INDEX (base)
2345           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2346         {
2347           civ = get_iv (data, TMR_INDEX (base));
2348           if (!civ)
2349             goto fail;
2350
2351           TMR_INDEX (base) = civ->base;
2352           astep = civ->step;
2353
2354           if (astep)
2355             {
2356               if (TMR_STEP (base))
2357                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2358
2359               step = fold_build2 (PLUS_EXPR, type, step, astep);
2360             }
2361         }
2362
2363       if (integer_zerop (step))
2364         goto fail;
2365       base = tree_mem_ref_addr (type, base);
2366     }
2367   else
2368     {
2369       ifs_ivopts_data.ivopts_data = data;
2370       ifs_ivopts_data.stmt = stmt;
2371       ifs_ivopts_data.step = size_zero_node;
2372       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2373           || integer_zerop (ifs_ivopts_data.step))
2374         goto fail;
2375       step = ifs_ivopts_data.step;
2376
2377       /* Check that the base expression is addressable.  This needs
2378          to be done after substituting bases of IVs into it.  */
2379       if (may_be_nonaddressable_p (base))
2380         goto fail;
2381
2382       /* Moreover, on strict alignment platforms, check that it is
2383          sufficiently aligned.  */
2384       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2385         goto fail;
2386
2387       base = build_fold_addr_expr (base);
2388
2389       /* Substituting bases of IVs into the base expression might
2390          have caused folding opportunities.  */
2391       if (TREE_CODE (base) == ADDR_EXPR)
2392         {
2393           tree *ref = &TREE_OPERAND (base, 0);
2394           while (handled_component_p (*ref))
2395             ref = &TREE_OPERAND (*ref, 0);
2396           if (TREE_CODE (*ref) == MEM_REF)
2397             {
2398               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2399                                       TREE_OPERAND (*ref, 0),
2400                                       TREE_OPERAND (*ref, 1));
2401               if (tem)
2402                 *ref = tem;
2403             }
2404         }
2405     }
2406
2407   civ = alloc_iv (data, base, step);
2408   /* Fail if base object of this memory reference is unknown.  */
2409   if (civ->base_object == NULL_TREE)
2410     goto fail;
2411
2412   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2413   return;
2414
2415 fail:
2416   for_each_index (op_p, idx_record_use, data);
2417 }
2418
2419 /* Finds and records invariants used in STMT.  */
2420
2421 static void
2422 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2423 {
2424   ssa_op_iter iter;
2425   use_operand_p use_p;
2426   tree op;
2427
2428   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2429     {
2430       op = USE_FROM_PTR (use_p);
2431       record_invariant (data, op, false);
2432     }
2433 }
2434
2435 /* CALL calls an internal function.  If operand *OP_P will become an
2436    address when the call is expanded, return the type of the memory
2437    being addressed, otherwise return null.  */
2438
2439 static tree
2440 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2441 {
2442   switch (gimple_call_internal_fn (call))
2443     {
2444     case IFN_MASK_LOAD:
2445     case IFN_MASK_LOAD_LANES:
2446     case IFN_MASK_LEN_LOAD_LANES:
2447     case IFN_LEN_LOAD:
2448     case IFN_MASK_LEN_LOAD:
2449       if (op_p == gimple_call_arg_ptr (call, 0))
2450         return TREE_TYPE (gimple_call_lhs (call));
2451       return NULL_TREE;
2452
2453     case IFN_MASK_STORE:
2454     case IFN_MASK_STORE_LANES:
2455     case IFN_MASK_LEN_STORE_LANES:
2456     case IFN_LEN_STORE:
2457     case IFN_MASK_LEN_STORE:
2458       {
2459         if (op_p == gimple_call_arg_ptr (call, 0))
2460           {
2461             internal_fn ifn = gimple_call_internal_fn (call);
2462             int index = internal_fn_stored_value_index (ifn);
2463             return TREE_TYPE (gimple_call_arg (call, index));
2464           }
2465         return NULL_TREE;
2466       }
2467
2468     default:
2469       return NULL_TREE;
2470     }
2471 }
2472
2473 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2474    Return true if the operand will become an address when STMT
2475    is expanded and record the associated address use if so.  */
2476
2477 static bool
2478 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2479                        struct iv *iv)
2480 {
2481   /* Fail if base object of this memory reference is unknown.  */
2482   if (iv->base_object == NULL_TREE)
2483     return false;
2484
2485   tree mem_type = NULL_TREE;
2486   if (gcall *call = dyn_cast <gcall *> (stmt))
2487     if (gimple_call_internal_p (call))
2488       mem_type = get_mem_type_for_internal_fn (call, op_p);
2489   if (mem_type)
2490     {
2491       iv = alloc_iv (data, iv->base, iv->step);
2492       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2493       return true;
2494     }
2495   return false;
2496 }
2497
2498 /* Finds interesting uses of induction variables in the statement STMT.  */
2499
2500 static void
2501 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2502 {
2503   struct iv *iv;
2504   tree op, *lhs, *rhs;
2505   ssa_op_iter iter;
2506   use_operand_p use_p;
2507   enum tree_code code;
2508
2509   find_invariants_stmt (data, stmt);
2510
2511   if (gimple_code (stmt) == GIMPLE_COND)
2512     {
2513       find_interesting_uses_cond (data, stmt);
2514       return;
2515     }
2516
2517   if (is_gimple_assign (stmt))
2518     {
2519       lhs = gimple_assign_lhs_ptr (stmt);
2520       rhs = gimple_assign_rhs1_ptr (stmt);
2521
2522       if (TREE_CODE (*lhs) == SSA_NAME)
2523         {
2524           /* If the statement defines an induction variable, the uses are not
2525              interesting by themselves.  */
2526
2527           iv = get_iv (data, *lhs);
2528
2529           if (iv && !integer_zerop (iv->step))
2530             return;
2531         }
2532
2533       code = gimple_assign_rhs_code (stmt);
2534       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2535           && (REFERENCE_CLASS_P (*rhs)
2536               || is_gimple_val (*rhs)))
2537         {
2538           if (REFERENCE_CLASS_P (*rhs))
2539             find_interesting_uses_address (data, stmt, rhs);
2540           else
2541             find_interesting_uses_op (data, *rhs);
2542
2543           if (REFERENCE_CLASS_P (*lhs))
2544             find_interesting_uses_address (data, stmt, lhs);
2545           return;
2546         }
2547       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2548         {
2549           find_interesting_uses_cond (data, stmt);
2550           return;
2551         }
2552
2553       /* TODO -- we should also handle address uses of type
2554
2555          memory = call (whatever);
2556
2557          and
2558
2559          call (memory).  */
2560     }
2561
2562   if (gimple_code (stmt) == GIMPLE_PHI
2563       && gimple_bb (stmt) == data->current_loop->header)
2564     {
2565       iv = get_iv (data, PHI_RESULT (stmt));
2566
2567       if (iv && !integer_zerop (iv->step))
2568         return;
2569     }
2570
2571   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2572     {
2573       op = USE_FROM_PTR (use_p);
2574
2575       if (TREE_CODE (op) != SSA_NAME)
2576         continue;
2577
2578       iv = get_iv (data, op);
2579       if (!iv)
2580         continue;
2581
2582       if (!find_address_like_use (data, stmt, use_p->use, iv))
2583         find_interesting_uses_op (data, op);
2584     }
2585 }
2586
2587 /* Finds interesting uses of induction variables outside of loops
2588    on loop exit edge EXIT.  */
2589
2590 static void
2591 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2592 {
2593   gphi *phi;
2594   gphi_iterator psi;
2595   tree def;
2596
2597   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2598     {
2599       phi = psi.phi ();
2600       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2601       if (!virtual_operand_p (def))
2602         find_interesting_uses_op (data, def);
2603     }
2604 }
2605
2606 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2607    mode for memory reference represented by USE.  */
2608
2609 static GTY (()) vec<rtx, va_gc> *addr_list;
2610
2611 static bool
2612 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2613 {
2614   rtx reg, addr;
2615   unsigned list_index;
2616   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2617   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2618
2619   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2620   if (list_index >= vec_safe_length (addr_list))
2621     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2622
2623   addr = (*addr_list)[list_index];
2624   if (!addr)
2625     {
2626       addr_mode = targetm.addr_space.address_mode (as);
2627       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2628       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2629       (*addr_list)[list_index] = addr;
2630     }
2631   else
2632     addr_mode = GET_MODE (addr);
2633
2634   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2635   return (memory_address_addr_space_p (mem_mode, addr, as));
2636 }
2637
2638 /* Comparison function to sort group in ascending order of addr_offset.  */
2639
2640 static int
2641 group_compare_offset (const void *a, const void *b)
2642 {
2643   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2644   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2645
2646   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2647 }
2648
2649 /* Check if small groups should be split.  Return true if no group
2650    contains more than two uses with distinct addr_offsets.  Return
2651    false otherwise.  We want to split such groups because:
2652
2653      1) Small groups don't have much benefit and may interfer with
2654         general candidate selection.
2655      2) Size for problem with only small groups is usually small and
2656         general algorithm can handle it well.
2657
2658    TODO -- Above claim may not hold when we want to merge memory
2659    accesses with conseuctive addresses.  */
2660
2661 static bool
2662 split_small_address_groups_p (struct ivopts_data *data)
2663 {
2664   unsigned int i, j, distinct = 1;
2665   struct iv_use *pre;
2666   struct iv_group *group;
2667
2668   for (i = 0; i < data->vgroups.length (); i++)
2669     {
2670       group = data->vgroups[i];
2671       if (group->vuses.length () == 1)
2672         continue;
2673
2674       gcc_assert (address_p (group->type));
2675       if (group->vuses.length () == 2)
2676         {
2677           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2678                                       group->vuses[1]->addr_offset) > 0)
2679             std::swap (group->vuses[0], group->vuses[1]);
2680         }
2681       else
2682         group->vuses.qsort (group_compare_offset);
2683
2684       if (distinct > 2)
2685         continue;
2686
2687       distinct = 1;
2688       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2689         {
2690           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2691             {
2692               pre = group->vuses[j];
2693               distinct++;
2694             }
2695
2696           if (distinct > 2)
2697             break;
2698         }
2699     }
2700
2701   return (distinct <= 2);
2702 }
2703
2704 /* For each group of address type uses, this function further groups
2705    these uses according to the maximum offset supported by target's
2706    [base + offset] addressing mode.  */
2707
2708 static void
2709 split_address_groups (struct ivopts_data *data)
2710 {
2711   unsigned int i, j;
2712   /* Always split group.  */
2713   bool split_p = split_small_address_groups_p (data);
2714
2715   for (i = 0; i < data->vgroups.length (); i++)
2716     {
2717       struct iv_group *new_group = NULL;
2718       struct iv_group *group = data->vgroups[i];
2719       struct iv_use *use = group->vuses[0];
2720
2721       use->id = 0;
2722       use->group_id = group->id;
2723       if (group->vuses.length () == 1)
2724         continue;
2725
2726       gcc_assert (address_p (use->type));
2727
2728       for (j = 1; j < group->vuses.length ();)
2729         {
2730           struct iv_use *next = group->vuses[j];
2731           poly_int64 offset = next->addr_offset - use->addr_offset;
2732
2733           /* Split group if aksed to, or the offset against the first
2734              use can't fit in offset part of addressing mode.  IV uses
2735              having the same offset are still kept in one group.  */
2736           if (maybe_ne (offset, 0)
2737               && (split_p || !addr_offset_valid_p (use, offset)))
2738             {
2739               if (!new_group)
2740                 new_group = record_group (data, group->type);
2741               group->vuses.ordered_remove (j);
2742               new_group->vuses.safe_push (next);
2743               continue;
2744             }
2745
2746           next->id = j;
2747           next->group_id = group->id;
2748           j++;
2749         }
2750     }
2751 }
2752
2753 /* Finds uses of the induction variables that are interesting.  */
2754
2755 static void
2756 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2757 {
2758   basic_block bb;
2759   gimple_stmt_iterator bsi;
2760   unsigned i;
2761   edge e;
2762
2763   for (i = 0; i < data->current_loop->num_nodes; i++)
2764     {
2765       edge_iterator ei;
2766       bb = body[i];
2767
2768       FOR_EACH_EDGE (e, ei, bb->succs)
2769         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2770             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2771           find_interesting_uses_outside (data, e);
2772
2773       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2774         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2775       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2776         if (!is_gimple_debug (gsi_stmt (bsi)))
2777           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2778     }
2779
2780   split_address_groups (data);
2781
2782   if (dump_file && (dump_flags & TDF_DETAILS))
2783     {
2784       fprintf (dump_file, "\n<IV Groups>:\n");
2785       dump_groups (dump_file, data);
2786       fprintf (dump_file, "\n");
2787     }
2788 }
2789
2790 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2791    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2792    we are at the top-level of the processed address.  */
2793
2794 static tree
2795 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2796                 poly_int64 *offset)
2797 {
2798   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2799   enum tree_code code;
2800   tree type, orig_type = TREE_TYPE (expr);
2801   poly_int64 off0, off1;
2802   HOST_WIDE_INT st;
2803   tree orig_expr = expr;
2804
2805   STRIP_NOPS (expr);
2806
2807   type = TREE_TYPE (expr);
2808   code = TREE_CODE (expr);
2809   *offset = 0;
2810
2811   switch (code)
2812     {
2813     case POINTER_PLUS_EXPR:
2814     case PLUS_EXPR:
2815     case MINUS_EXPR:
2816       op0 = TREE_OPERAND (expr, 0);
2817       op1 = TREE_OPERAND (expr, 1);
2818
2819       op0 = strip_offset_1 (op0, false, false, &off0);
2820       op1 = strip_offset_1 (op1, false, false, &off1);
2821
2822       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2823       if (op0 == TREE_OPERAND (expr, 0)
2824           && op1 == TREE_OPERAND (expr, 1))
2825         return orig_expr;
2826
2827       if (integer_zerop (op1))
2828         expr = op0;
2829       else if (integer_zerop (op0))
2830         {
2831           if (code == MINUS_EXPR)
2832             {
2833               if (TYPE_OVERFLOW_UNDEFINED (type))
2834                 {
2835                   type = unsigned_type_for (type);
2836                   op1 = fold_convert (type, op1);
2837                 }
2838               expr = fold_build1 (NEGATE_EXPR, type, op1);
2839             }
2840           else
2841             expr = op1;
2842         }
2843       else
2844         {
2845           if (TYPE_OVERFLOW_UNDEFINED (type))
2846             {
2847               type = unsigned_type_for (type);
2848               if (code == POINTER_PLUS_EXPR)
2849                 code = PLUS_EXPR;
2850               op0 = fold_convert (type, op0);
2851               op1 = fold_convert (type, op1);
2852             }
2853           expr = fold_build2 (code, type, op0, op1);
2854         }
2855
2856       return fold_convert (orig_type, expr);
2857
2858     case MULT_EXPR:
2859       op1 = TREE_OPERAND (expr, 1);
2860       if (!cst_and_fits_in_hwi (op1))
2861         return orig_expr;
2862
2863       op0 = TREE_OPERAND (expr, 0);
2864       op0 = strip_offset_1 (op0, false, false, &off0);
2865       if (op0 == TREE_OPERAND (expr, 0))
2866         return orig_expr;
2867
2868       *offset = off0 * int_cst_value (op1);
2869       if (integer_zerop (op0))
2870         expr = op0;
2871       else
2872         {
2873           if (TYPE_OVERFLOW_UNDEFINED (type))
2874             {
2875               type = unsigned_type_for (type);
2876               op0 = fold_convert (type, op0);
2877               op1 = fold_convert (type, op1);
2878             }
2879           expr = fold_build2 (MULT_EXPR, type, op0, op1);
2880         }
2881
2882       return fold_convert (orig_type, expr);
2883
2884     case ARRAY_REF:
2885     case ARRAY_RANGE_REF:
2886       if (!inside_addr)
2887         return orig_expr;
2888
2889       step = array_ref_element_size (expr);
2890       if (!cst_and_fits_in_hwi (step))
2891         break;
2892
2893       st = int_cst_value (step);
2894       op1 = TREE_OPERAND (expr, 1);
2895       op1 = strip_offset_1 (op1, false, false, &off1);
2896       *offset = off1 * st;
2897
2898       if (top_compref
2899           && integer_zerop (op1))
2900         {
2901           /* Strip the component reference completely.  */
2902           op0 = TREE_OPERAND (expr, 0);
2903           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2904           *offset += off0;
2905           return op0;
2906         }
2907       break;
2908
2909     case COMPONENT_REF:
2910       {
2911         tree field;
2912
2913         if (!inside_addr)
2914           return orig_expr;
2915
2916         tmp = component_ref_field_offset (expr);
2917         field = TREE_OPERAND (expr, 1);
2918         if (top_compref
2919             && cst_and_fits_in_hwi (tmp)
2920             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2921           {
2922             HOST_WIDE_INT boffset, abs_off;
2923
2924             /* Strip the component reference completely.  */
2925             op0 = TREE_OPERAND (expr, 0);
2926             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2927             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2928             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2929             if (boffset < 0)
2930               abs_off = -abs_off;
2931
2932             *offset = off0 + int_cst_value (tmp) + abs_off;
2933             return op0;
2934           }
2935       }
2936       break;
2937
2938     case ADDR_EXPR:
2939       op0 = TREE_OPERAND (expr, 0);
2940       op0 = strip_offset_1 (op0, true, true, &off0);
2941       *offset += off0;
2942
2943       if (op0 == TREE_OPERAND (expr, 0))
2944         return orig_expr;
2945
2946       expr = build_fold_addr_expr (op0);
2947       return fold_convert (orig_type, expr);
2948
2949     case MEM_REF:
2950       /* ???  Offset operand?  */
2951       inside_addr = false;
2952       break;
2953
2954     default:
2955       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2956         return build_int_cst (orig_type, 0);
2957       return orig_expr;
2958     }
2959
2960   /* Default handling of expressions for that we want to recurse into
2961      the first operand.  */
2962   op0 = TREE_OPERAND (expr, 0);
2963   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2964   *offset += off0;
2965
2966   if (op0 == TREE_OPERAND (expr, 0)
2967       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2968     return orig_expr;
2969
2970   expr = copy_node (expr);
2971   TREE_OPERAND (expr, 0) = op0;
2972   if (op1)
2973     TREE_OPERAND (expr, 1) = op1;
2974
2975   /* Inside address, we might strip the top level component references,
2976      thus changing type of the expression.  Handling of ADDR_EXPR
2977      will fix that.  */
2978   expr = fold_convert (orig_type, expr);
2979
2980   return expr;
2981 }
2982
2983 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2984
2985 static tree
2986 strip_offset (tree expr, poly_uint64 *offset)
2987 {
2988   poly_int64 off;
2989   tree core = strip_offset_1 (expr, false, false, &off);
2990   *offset = off;
2991   return core;
2992 }
2993
2994 /* Returns variant of TYPE that can be used as base for different uses.
2995    We return unsigned type with the same precision, which avoids problems
2996    with overflows.  */
2997
2998 static tree
2999 generic_type_for (tree type)
3000 {
3001   if (POINTER_TYPE_P (type))
3002     return unsigned_type_for (type);
3003
3004   if (TYPE_UNSIGNED (type))
3005     return type;
3006
3007   return unsigned_type_for (type);
3008 }
3009
3010 /* Private data for walk_tree.  */
3011
3012 struct walk_tree_data
3013 {
3014   bitmap *inv_vars;
3015   struct ivopts_data *idata;
3016 };
3017
3018 /* Callback function for walk_tree, it records invariants and symbol
3019    reference in *EXPR_P.  DATA is the structure storing result info.  */
3020
3021 static tree
3022 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
3023 {
3024   tree op = *expr_p;
3025   struct version_info *info;
3026   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
3027
3028   if (TREE_CODE (op) != SSA_NAME)
3029     return NULL_TREE;
3030
3031   info = name_info (wdata->idata, op);
3032   /* Because we expand simple operations when finding IVs, loop invariant
3033      variable that isn't referred by the original loop could be used now.
3034      Record such invariant variables here.  */
3035   if (!info->iv)
3036     {
3037       struct ivopts_data *idata = wdata->idata;
3038       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3039
3040       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3041         {
3042           tree steptype = TREE_TYPE (op);
3043           if (POINTER_TYPE_P (steptype))
3044             steptype = sizetype;
3045           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3046           record_invariant (idata, op, false);
3047         }
3048     }
3049   if (!info->inv_id || info->has_nonlin_use)
3050     return NULL_TREE;
3051
3052   if (!*wdata->inv_vars)
3053     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3054   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3055
3056   return NULL_TREE;
3057 }
3058
3059 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3060    store it.  */
3061
3062 static inline void
3063 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3064 {
3065   struct walk_tree_data wdata;
3066
3067   if (!inv_vars)
3068     return;
3069
3070   wdata.idata = data;
3071   wdata.inv_vars = inv_vars;
3072   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3073 }
3074
3075 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3076    will be recorded if it doesn't exist yet.  Given below two exprs:
3077      inv_expr + cst1, inv_expr + cst2
3078    It's hard to make decision whether constant part should be stripped
3079    or not.  We choose to not strip based on below facts:
3080      1) We need to count ADD cost for constant part if it's stripped,
3081         which isn't always trivial where this functions is called.
3082      2) Stripping constant away may be conflict with following loop
3083         invariant hoisting pass.
3084      3) Not stripping constant away results in more invariant exprs,
3085         which usually leads to decision preferring lower reg pressure.  */
3086
3087 static iv_inv_expr_ent *
3088 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3089 {
3090   STRIP_NOPS (inv_expr);
3091
3092   if (poly_int_tree_p (inv_expr)
3093       || TREE_CODE (inv_expr) == SSA_NAME)
3094     return NULL;
3095
3096   /* Don't strip constant part away as we used to.  */
3097
3098   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3099   struct iv_inv_expr_ent ent;
3100   ent.expr = inv_expr;
3101   ent.hash = iterative_hash_expr (inv_expr, 0);
3102   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3103
3104   if (!*slot)
3105     {
3106       *slot = XNEW (struct iv_inv_expr_ent);
3107       (*slot)->expr = inv_expr;
3108       (*slot)->hash = ent.hash;
3109       (*slot)->id = ++data->max_inv_expr_id;
3110     }
3111
3112   return *slot;
3113 }
3114
3115
3116 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3117    unsuitable as ivopts candidates for potentially involving undefined
3118    behavior.  */
3119
3120 static tree
3121 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3122 {
3123   basic_block bb = (basic_block) bb_;
3124   if (TREE_CODE (*tp) == SSA_NAME
3125       && ssa_name_maybe_undef_p (*tp)
3126       && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3127     return *tp;
3128   if (!EXPR_P (*tp))
3129     *walk_subtrees = 0;
3130   return NULL;
3131 }
3132
3133 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3134    position to POS.  If USE is not NULL, the candidate is set as related to
3135    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3136    replacement of the final value of the iv by a direct computation.  */
3137
3138 static struct iv_cand *
3139 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3140                  enum iv_position pos, struct iv_use *use,
3141                  gimple *incremented_at, struct iv *orig_iv = NULL,
3142                  bool doloop = false)
3143 {
3144   unsigned i;
3145   struct iv_cand *cand = NULL;
3146   tree type, orig_type;
3147
3148   gcc_assert (base && step);
3149
3150   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3151      live, but the ivopts code may replace a real pointer with one
3152      pointing before or after the memory block that is then adjusted
3153      into the memory block during the loop.  FIXME: It would likely be
3154      better to actually force the pointer live and still use ivopts;
3155      for example, it would be enough to write the pointer into memory
3156      and keep it there until after the loop.  */
3157   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3158     return NULL;
3159
3160   /* If BASE contains undefined SSA names make sure we only record
3161      the original IV.  */
3162   bool involves_undefs = false;
3163   if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3164     {
3165       if (pos != IP_ORIGINAL)
3166         return NULL;
3167       important = false;
3168       involves_undefs = true;
3169     }
3170
3171   /* For non-original variables, make sure their values are computed in a type
3172      that does not invoke undefined behavior on overflows (since in general,
3173      we cannot prove that these induction variables are non-wrapping).  */
3174   if (pos != IP_ORIGINAL)
3175     {
3176       orig_type = TREE_TYPE (base);
3177       type = generic_type_for (orig_type);
3178       if (type != orig_type)
3179         {
3180           base = fold_convert (type, base);
3181           step = fold_convert (type, step);
3182         }
3183     }
3184
3185   for (i = 0; i < data->vcands.length (); i++)
3186     {
3187       cand = data->vcands[i];
3188
3189       if (cand->pos != pos)
3190         continue;
3191
3192       if (cand->incremented_at != incremented_at
3193           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3194               && cand->ainc_use != use))
3195         continue;
3196
3197       if (operand_equal_p (base, cand->iv->base, 0)
3198           && operand_equal_p (step, cand->iv->step, 0)
3199           && (TYPE_PRECISION (TREE_TYPE (base))
3200               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3201         break;
3202     }
3203
3204   if (i == data->vcands.length ())
3205     {
3206       cand = XCNEW (struct iv_cand);
3207       cand->id = i;
3208       cand->iv = alloc_iv (data, base, step);
3209       cand->pos = pos;
3210       if (pos != IP_ORIGINAL)
3211         {
3212           if (doloop)
3213             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3214           else
3215             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3216           cand->var_after = cand->var_before;
3217         }
3218       cand->important = important;
3219       cand->involves_undefs = involves_undefs;
3220       cand->incremented_at = incremented_at;
3221       cand->doloop_p = doloop;
3222       data->vcands.safe_push (cand);
3223
3224       if (!poly_int_tree_p (step))
3225         {
3226           find_inv_vars (data, &step, &cand->inv_vars);
3227
3228           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3229           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3230           if (inv_expr != NULL)
3231             {
3232               cand->inv_exprs = cand->inv_vars;
3233               cand->inv_vars = NULL;
3234               if (cand->inv_exprs)
3235                 bitmap_clear (cand->inv_exprs);
3236               else
3237                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3238
3239               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3240             }
3241         }
3242
3243       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3244         cand->ainc_use = use;
3245       else
3246         cand->ainc_use = NULL;
3247
3248       cand->orig_iv = orig_iv;
3249       if (dump_file && (dump_flags & TDF_DETAILS))
3250         dump_cand (dump_file, cand);
3251     }
3252
3253   cand->important |= important;
3254   cand->doloop_p |= doloop;
3255
3256   /* Relate candidate to the group for which it is added.  */
3257   if (use)
3258     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3259
3260   return cand;
3261 }
3262
3263 /* Returns true if incrementing the induction variable at the end of the LOOP
3264    is allowed.
3265
3266    The purpose is to avoid splitting latch edge with a biv increment, thus
3267    creating a jump, possibly confusing other optimization passes and leaving
3268    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3269    available (so we do not have a better alternative), or if the latch edge
3270    is already nonempty.  */
3271
3272 static bool
3273 allow_ip_end_pos_p (class loop *loop)
3274 {
3275   if (!ip_normal_pos (loop))
3276     return true;
3277
3278   if (!empty_block_p (ip_end_pos (loop)))
3279     return true;
3280
3281   return false;
3282 }
3283
3284 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3285    Important field is set to IMPORTANT.  */
3286
3287 static void
3288 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3289                         bool important, struct iv_use *use)
3290 {
3291   basic_block use_bb = gimple_bb (use->stmt);
3292   machine_mode mem_mode;
3293   unsigned HOST_WIDE_INT cstepi;
3294
3295   /* If we insert the increment in any position other than the standard
3296      ones, we must ensure that it is incremented once per iteration.
3297      It must not be in an inner nested loop, or one side of an if
3298      statement.  */
3299   if (use_bb->loop_father != data->current_loop
3300       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3301       || stmt_can_throw_internal (cfun, use->stmt)
3302       || !cst_and_fits_in_hwi (step))
3303     return;
3304
3305   cstepi = int_cst_value (step);
3306
3307   mem_mode = TYPE_MODE (use->mem_type);
3308   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3309         || USE_STORE_PRE_INCREMENT (mem_mode))
3310        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3311       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3312            || USE_STORE_PRE_DECREMENT (mem_mode))
3313           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3314     {
3315       enum tree_code code = MINUS_EXPR;
3316       tree new_base;
3317       tree new_step = step;
3318
3319       if (POINTER_TYPE_P (TREE_TYPE (base)))
3320         {
3321           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3322           code = POINTER_PLUS_EXPR;
3323         }
3324       else
3325         new_step = fold_convert (TREE_TYPE (base), new_step);
3326       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3327       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3328                        use->stmt);
3329     }
3330   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3331         || USE_STORE_POST_INCREMENT (mem_mode))
3332        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3333       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3334            || USE_STORE_POST_DECREMENT (mem_mode))
3335           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3336     {
3337       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3338                        use->stmt);
3339     }
3340 }
3341
3342 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3343    position to POS.  If USE is not NULL, the candidate is set as related to
3344    it.  The candidate computation is scheduled before exit condition and at
3345    the end of loop.  */
3346
3347 static void
3348 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3349                struct iv_use *use, struct iv *orig_iv = NULL,
3350                bool doloop = false)
3351 {
3352   if (ip_normal_pos (data->current_loop))
3353     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3354                      doloop);
3355   /* Exclude doloop candidate here since it requires decrement then comparison
3356      and jump, the IP_END position doesn't match.  */
3357   if (!doloop && ip_end_pos (data->current_loop)
3358       && allow_ip_end_pos_p (data->current_loop))
3359     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3360 }
3361
3362 /* Adds standard iv candidates.  */
3363
3364 static void
3365 add_standard_iv_candidates (struct ivopts_data *data)
3366 {
3367   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3368
3369   /* The same for a double-integer type if it is still fast enough.  */
3370   if (TYPE_PRECISION
3371         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3372       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3373     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3374                    build_int_cst (long_integer_type_node, 1), true, NULL);
3375
3376   /* The same for a double-integer type if it is still fast enough.  */
3377   if (TYPE_PRECISION
3378         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3379       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3380     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3381                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3382 }
3383
3384
3385 /* Adds candidates bases on the old induction variable IV.  */
3386
3387 static void
3388 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3389 {
3390   gimple *phi;
3391   tree def;
3392   struct iv_cand *cand;
3393
3394   /* Check if this biv is used in address type use.  */
3395   if (iv->no_overflow  && iv->have_address_use
3396       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3397       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3398     {
3399       tree base = fold_convert (sizetype, iv->base);
3400       tree step = fold_convert (sizetype, iv->step);
3401
3402       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3403       add_candidate (data, base, step, true, NULL, iv);
3404       /* Add iv cand of the original type only if it has nonlinear use.  */
3405       if (iv->nonlin_use)
3406         add_candidate (data, iv->base, iv->step, true, NULL);
3407     }
3408   else
3409     add_candidate (data, iv->base, iv->step, true, NULL);
3410
3411   /* The same, but with initial value zero.  */
3412   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3413     add_candidate (data, size_int (0), iv->step, true, NULL);
3414   else
3415     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3416                    iv->step, true, NULL);
3417
3418   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3419   if (gimple_code (phi) == GIMPLE_PHI)
3420     {
3421       /* Additionally record the possibility of leaving the original iv
3422          untouched.  */
3423       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3424       /* Don't add candidate if it's from another PHI node because
3425          it's an affine iv appearing in the form of PEELED_CHREC.  */
3426       phi = SSA_NAME_DEF_STMT (def);
3427       if (gimple_code (phi) != GIMPLE_PHI)
3428         {
3429           cand = add_candidate_1 (data,
3430                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3431                                   SSA_NAME_DEF_STMT (def));
3432           if (cand)
3433             {
3434               cand->var_before = iv->ssa_name;
3435               cand->var_after = def;
3436             }
3437         }
3438       else
3439         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3440     }
3441 }
3442
3443 /* Adds candidates based on the old induction variables.  */
3444
3445 static void
3446 add_iv_candidate_for_bivs (struct ivopts_data *data)
3447 {
3448   unsigned i;
3449   struct iv *iv;
3450   bitmap_iterator bi;
3451
3452   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3453     {
3454       iv = ver_info (data, i)->iv;
3455       if (iv && iv->biv_p && !integer_zerop (iv->step))
3456         add_iv_candidate_for_biv (data, iv);
3457     }
3458 }
3459
3460 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3461
3462 static void
3463 record_common_cand (struct ivopts_data *data, tree base,
3464                     tree step, struct iv_use *use)
3465 {
3466   class iv_common_cand ent;
3467   class iv_common_cand **slot;
3468
3469   ent.base = base;
3470   ent.step = step;
3471   ent.hash = iterative_hash_expr (base, 0);
3472   ent.hash = iterative_hash_expr (step, ent.hash);
3473
3474   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3475   if (*slot == NULL)
3476     {
3477       *slot = new iv_common_cand ();
3478       (*slot)->base = base;
3479       (*slot)->step = step;
3480       (*slot)->uses.create (8);
3481       (*slot)->hash = ent.hash;
3482       data->iv_common_cands.safe_push ((*slot));
3483     }
3484
3485   gcc_assert (use != NULL);
3486   (*slot)->uses.safe_push (use);
3487   return;
3488 }
3489
3490 /* Comparison function used to sort common candidates.  */
3491
3492 static int
3493 common_cand_cmp (const void *p1, const void *p2)
3494 {
3495   unsigned n1, n2;
3496   const class iv_common_cand *const *const ccand1
3497     = (const class iv_common_cand *const *)p1;
3498   const class iv_common_cand *const *const ccand2
3499     = (const class iv_common_cand *const *)p2;
3500
3501   n1 = (*ccand1)->uses.length ();
3502   n2 = (*ccand2)->uses.length ();
3503   return n2 - n1;
3504 }
3505
3506 /* Adds IV candidates based on common candidated recorded.  */
3507
3508 static void
3509 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3510 {
3511   unsigned i, j;
3512   struct iv_cand *cand_1, *cand_2;
3513
3514   data->iv_common_cands.qsort (common_cand_cmp);
3515   for (i = 0; i < data->iv_common_cands.length (); i++)
3516     {
3517       class iv_common_cand *ptr = data->iv_common_cands[i];
3518
3519       /* Only add IV candidate if it's derived from multiple uses.  */
3520       if (ptr->uses.length () <= 1)
3521         break;
3522
3523       cand_1 = NULL;
3524       cand_2 = NULL;
3525       if (ip_normal_pos (data->current_loop))
3526         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3527                                   false, IP_NORMAL, NULL, NULL);
3528
3529       if (ip_end_pos (data->current_loop)
3530           && allow_ip_end_pos_p (data->current_loop))
3531         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3532                                   false, IP_END, NULL, NULL);
3533
3534       /* Bind deriving uses and the new candidates.  */
3535       for (j = 0; j < ptr->uses.length (); j++)
3536         {
3537           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3538           if (cand_1)
3539             bitmap_set_bit (group->related_cands, cand_1->id);
3540           if (cand_2)
3541             bitmap_set_bit (group->related_cands, cand_2->id);
3542         }
3543     }
3544
3545   /* Release data since it is useless from this point.  */
3546   data->iv_common_cand_tab->empty ();
3547   data->iv_common_cands.truncate (0);
3548 }
3549
3550 /* Adds candidates based on the value of USE's iv.  */
3551
3552 static void
3553 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3554 {
3555   poly_uint64 offset;
3556   tree base;
3557   struct iv *iv = use->iv;
3558   tree basetype = TREE_TYPE (iv->base);
3559
3560   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3561      precision types, instead, add candidate for the corresponding scev in
3562      unsigned type with the same precision.  See PR93674 for more info.  */
3563   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3564       || !type_has_mode_precision_p (basetype))
3565     {
3566       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3567                                                  TYPE_UNSIGNED (basetype));
3568       add_candidate (data, fold_convert (basetype, iv->base),
3569                      fold_convert (basetype, iv->step), false, NULL);
3570       return;
3571     }
3572
3573   add_candidate (data, iv->base, iv->step, false, use);
3574
3575   /* Record common candidate for use in case it can be shared by others.  */
3576   record_common_cand (data, iv->base, iv->step, use);
3577
3578   /* Record common candidate with initial value zero.  */
3579   basetype = TREE_TYPE (iv->base);
3580   if (POINTER_TYPE_P (basetype))
3581     basetype = sizetype;
3582   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3583
3584   /* Compare the cost of an address with an unscaled index with the cost of
3585     an address with a scaled index and add candidate if useful.  */
3586   poly_int64 step;
3587   if (use != NULL
3588       && poly_int_tree_p (iv->step, &step)
3589       && address_p (use->type))
3590     {
3591       poly_int64 new_step;
3592       unsigned int fact = preferred_mem_scale_factor
3593         (use->iv->base,
3594          TYPE_MODE (use->mem_type),
3595          optimize_loop_for_speed_p (data->current_loop));
3596
3597       if (fact != 1
3598           && multiple_p (step, fact, &new_step))
3599         add_candidate (data, size_int (0),
3600                        wide_int_to_tree (sizetype, new_step),
3601                        true, NULL);
3602     }
3603
3604   /* Record common candidate with constant offset stripped in base.
3605      Like the use itself, we also add candidate directly for it.  */
3606   base = strip_offset (iv->base, &offset);
3607   if (maybe_ne (offset, 0U) || base != iv->base)
3608     {
3609       record_common_cand (data, base, iv->step, use);
3610       add_candidate (data, base, iv->step, false, use);
3611     }
3612
3613   /* Record common candidate with base_object removed in base.  */
3614   base = iv->base;
3615   STRIP_NOPS (base);
3616   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3617     {
3618       tree step = iv->step;
3619
3620       STRIP_NOPS (step);
3621       base = TREE_OPERAND (base, 1);
3622       step = fold_convert (sizetype, step);
3623       record_common_cand (data, base, step, use);
3624       /* Also record common candidate with offset stripped.  */
3625       tree alt_base, alt_offset;
3626       split_constant_offset (base, &alt_base, &alt_offset);
3627       if (!integer_zerop (alt_offset))
3628         record_common_cand (data, alt_base, step, use);
3629     }
3630
3631   /* At last, add auto-incremental candidates.  Make such variables
3632      important since other iv uses with same base object may be based
3633      on it.  */
3634   if (use != NULL && address_p (use->type))
3635     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3636 }
3637
3638 /* Adds candidates based on the uses.  */
3639
3640 static void
3641 add_iv_candidate_for_groups (struct ivopts_data *data)
3642 {
3643   unsigned i;
3644
3645   /* Only add candidate for the first use in group.  */
3646   for (i = 0; i < data->vgroups.length (); i++)
3647     {
3648       struct iv_group *group = data->vgroups[i];
3649
3650       gcc_assert (group->vuses[0] != NULL);
3651       add_iv_candidate_for_use (data, group->vuses[0]);
3652     }
3653   add_iv_candidate_derived_from_uses (data);
3654 }
3655
3656 /* Record important candidates and add them to related_cands bitmaps.  */
3657
3658 static void
3659 record_important_candidates (struct ivopts_data *data)
3660 {
3661   unsigned i;
3662   struct iv_group *group;
3663
3664   for (i = 0; i < data->vcands.length (); i++)
3665     {
3666       struct iv_cand *cand = data->vcands[i];
3667
3668       if (cand->important)
3669         bitmap_set_bit (data->important_candidates, i);
3670     }
3671
3672   data->consider_all_candidates = (data->vcands.length ()
3673                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3674
3675   /* Add important candidates to groups' related_cands bitmaps.  */
3676   for (i = 0; i < data->vgroups.length (); i++)
3677     {
3678       group = data->vgroups[i];
3679       bitmap_ior_into (group->related_cands, data->important_candidates);
3680     }
3681 }
3682
3683 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3684    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3685    we allocate a simple list to every use.  */
3686
3687 static void
3688 alloc_use_cost_map (struct ivopts_data *data)
3689 {
3690   unsigned i, size, s;
3691
3692   for (i = 0; i < data->vgroups.length (); i++)
3693     {
3694       struct iv_group *group = data->vgroups[i];
3695
3696       if (data->consider_all_candidates)
3697         size = data->vcands.length ();
3698       else
3699         {
3700           s = bitmap_count_bits (group->related_cands);
3701
3702           /* Round up to the power of two, so that moduling by it is fast.  */
3703           size = s ? (1 << ceil_log2 (s)) : 1;
3704         }
3705
3706       group->n_map_members = size;
3707       group->cost_map = XCNEWVEC (class cost_pair, size);
3708     }
3709 }
3710
3711 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3712    on invariants INV_VARS and that the value used in expressing it is
3713    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3714
3715 static void
3716 set_group_iv_cost (struct ivopts_data *data,
3717                    struct iv_group *group, struct iv_cand *cand,
3718                    comp_cost cost, bitmap inv_vars, tree value,
3719                    enum tree_code comp, bitmap inv_exprs)
3720 {
3721   unsigned i, s;
3722
3723   if (cost.infinite_cost_p ())
3724     {
3725       BITMAP_FREE (inv_vars);
3726       BITMAP_FREE (inv_exprs);
3727       return;
3728     }
3729
3730   if (data->consider_all_candidates)
3731     {
3732       group->cost_map[cand->id].cand = cand;
3733       group->cost_map[cand->id].cost = cost;
3734       group->cost_map[cand->id].inv_vars = inv_vars;
3735       group->cost_map[cand->id].inv_exprs = inv_exprs;
3736       group->cost_map[cand->id].value = value;
3737       group->cost_map[cand->id].comp = comp;
3738       return;
3739     }
3740
3741   /* n_map_members is a power of two, so this computes modulo.  */
3742   s = cand->id & (group->n_map_members - 1);
3743   for (i = s; i < group->n_map_members; i++)
3744     if (!group->cost_map[i].cand)
3745       goto found;
3746   for (i = 0; i < s; i++)
3747     if (!group->cost_map[i].cand)
3748       goto found;
3749
3750   gcc_unreachable ();
3751
3752 found:
3753   group->cost_map[i].cand = cand;
3754   group->cost_map[i].cost = cost;
3755   group->cost_map[i].inv_vars = inv_vars;
3756   group->cost_map[i].inv_exprs = inv_exprs;
3757   group->cost_map[i].value = value;
3758   group->cost_map[i].comp = comp;
3759 }
3760
3761 /* Gets cost of (GROUP, CAND) pair.  */
3762
3763 static class cost_pair *
3764 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3765                    struct iv_cand *cand)
3766 {
3767   unsigned i, s;
3768   class cost_pair *ret;
3769
3770   if (!cand)
3771     return NULL;
3772
3773   if (data->consider_all_candidates)
3774     {
3775       ret = group->cost_map + cand->id;
3776       if (!ret->cand)
3777         return NULL;
3778
3779       return ret;
3780     }
3781
3782   /* n_map_members is a power of two, so this computes modulo.  */
3783   s = cand->id & (group->n_map_members - 1);
3784   for (i = s; i < group->n_map_members; i++)
3785     if (group->cost_map[i].cand == cand)
3786       return group->cost_map + i;
3787     else if (group->cost_map[i].cand == NULL)
3788       return NULL;
3789   for (i = 0; i < s; i++)
3790     if (group->cost_map[i].cand == cand)
3791       return group->cost_map + i;
3792     else if (group->cost_map[i].cand == NULL)
3793       return NULL;
3794
3795   return NULL;
3796 }
3797
3798 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3799 static rtx
3800 produce_memory_decl_rtl (tree obj, int *regno)
3801 {
3802   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3803   machine_mode address_mode = targetm.addr_space.address_mode (as);
3804   rtx x;
3805
3806   gcc_assert (obj);
3807   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3808     {
3809       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3810       x = gen_rtx_SYMBOL_REF (address_mode, name);
3811       SET_SYMBOL_REF_DECL (x, obj);
3812       x = gen_rtx_MEM (DECL_MODE (obj), x);
3813       set_mem_addr_space (x, as);
3814       targetm.encode_section_info (obj, x, true);
3815     }
3816   else
3817     {
3818       x = gen_raw_REG (address_mode, (*regno)++);
3819       x = gen_rtx_MEM (DECL_MODE (obj), x);
3820       set_mem_addr_space (x, as);
3821     }
3822
3823   return x;
3824 }
3825
3826 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3827    walk_tree.  DATA contains the actual fake register number.  */
3828
3829 static tree
3830 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3831 {
3832   tree obj = NULL_TREE;
3833   rtx x = NULL_RTX;
3834   int *regno = (int *) data;
3835
3836   switch (TREE_CODE (*expr_p))
3837     {
3838     case ADDR_EXPR:
3839       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3840            handled_component_p (*expr_p);
3841            expr_p = &TREE_OPERAND (*expr_p, 0))
3842         continue;
3843       obj = *expr_p;
3844       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3845         x = produce_memory_decl_rtl (obj, regno);
3846       break;
3847
3848     case SSA_NAME:
3849       *ws = 0;
3850       obj = SSA_NAME_VAR (*expr_p);
3851       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3852       if (!obj)
3853         return NULL_TREE;
3854       if (!DECL_RTL_SET_P (obj))
3855         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3856       break;
3857
3858     case VAR_DECL:
3859     case PARM_DECL:
3860     case RESULT_DECL:
3861       *ws = 0;
3862       obj = *expr_p;
3863
3864       if (DECL_RTL_SET_P (obj))
3865         break;
3866
3867       if (DECL_MODE (obj) == BLKmode)
3868         x = produce_memory_decl_rtl (obj, regno);
3869       else
3870         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3871
3872       break;
3873
3874     default:
3875       break;
3876     }
3877
3878   if (x)
3879     {
3880       decl_rtl_to_reset.safe_push (obj);
3881       SET_DECL_RTL (obj, x);
3882     }
3883
3884   return NULL_TREE;
3885 }
3886
3887 /* Predict whether the given loop will be transformed in the RTL
3888    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3889    This is only for target independent checks, see targetm.predict_doloop_p
3890    for the target dependent ones.
3891
3892    Note that according to some initial investigation, some checks like costly
3893    niter check and invalid stmt scanning don't have much gains among general
3894    cases, so keep this as simple as possible first.
3895
3896    Some RTL specific checks seems unable to be checked in gimple, if any new
3897    checks or easy checks _are_ missing here, please add them.  */
3898
3899 static bool
3900 generic_predict_doloop_p (struct ivopts_data *data)
3901 {
3902   class loop *loop = data->current_loop;
3903
3904   /* Call target hook for target dependent checks.  */
3905   if (!targetm.predict_doloop_p (loop))
3906     {
3907       if (dump_file && (dump_flags & TDF_DETAILS))
3908         fprintf (dump_file, "Predict doloop failure due to"
3909                             " target specific checks.\n");
3910       return false;
3911     }
3912
3913   /* Similar to doloop_optimize, check iteration description to know it's
3914      suitable or not.  Keep it as simple as possible, feel free to extend it
3915      if you find any multiple exits cases matter.  */
3916   edge exit = single_dom_exit (loop);
3917   class tree_niter_desc *niter_desc;
3918   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3919     {
3920       if (dump_file && (dump_flags & TDF_DETAILS))
3921         fprintf (dump_file, "Predict doloop failure due to"
3922                             " unexpected niters.\n");
3923       return false;
3924     }
3925
3926   /* Similar to doloop_optimize, check whether iteration count too small
3927      and not profitable.  */
3928   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3929   if (est_niter == -1)
3930     est_niter = get_likely_max_loop_iterations_int (loop);
3931   if (est_niter >= 0 && est_niter < 3)
3932     {
3933       if (dump_file && (dump_flags & TDF_DETAILS))
3934         fprintf (dump_file,
3935                  "Predict doloop failure due to"
3936                  " too few iterations (%u).\n",
3937                  (unsigned int) est_niter);
3938       return false;
3939     }
3940
3941   return true;
3942 }
3943
3944 /* Determines cost of the computation of EXPR.  */
3945
3946 static unsigned
3947 computation_cost (tree expr, bool speed)
3948 {
3949   rtx_insn *seq;
3950   rtx rslt;
3951   tree type = TREE_TYPE (expr);
3952   unsigned cost;
3953   /* Avoid using hard regs in ways which may be unsupported.  */
3954   int regno = LAST_VIRTUAL_REGISTER + 1;
3955   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3956   enum node_frequency real_frequency = node->frequency;
3957
3958   node->frequency = NODE_FREQUENCY_NORMAL;
3959   crtl->maybe_hot_insn_p = speed;
3960   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3961   start_sequence ();
3962   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3963   seq = get_insns ();
3964   end_sequence ();
3965   default_rtl_profile ();
3966   node->frequency = real_frequency;
3967
3968   cost = seq_cost (seq, speed);
3969   if (MEM_P (rslt))
3970     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3971                           TYPE_ADDR_SPACE (type), speed);
3972   else if (!REG_P (rslt))
3973     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3974
3975   return cost;
3976 }
3977
3978 /* Returns variable containing the value of candidate CAND at statement AT.  */
3979
3980 static tree
3981 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3982 {
3983   if (stmt_after_increment (loop, cand, stmt))
3984     return cand->var_after;
3985   else
3986     return cand->var_before;
3987 }
3988
3989 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3990    same precision that is at least as wide as the precision of TYPE, stores
3991    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3992    type of A and B.  */
3993
3994 static tree
3995 determine_common_wider_type (tree *a, tree *b)
3996 {
3997   tree wider_type = NULL;
3998   tree suba, subb;
3999   tree atype = TREE_TYPE (*a);
4000
4001   if (CONVERT_EXPR_P (*a))
4002     {
4003       suba = TREE_OPERAND (*a, 0);
4004       wider_type = TREE_TYPE (suba);
4005       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
4006         return atype;
4007     }
4008   else
4009     return atype;
4010
4011   if (CONVERT_EXPR_P (*b))
4012     {
4013       subb = TREE_OPERAND (*b, 0);
4014       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
4015         return atype;
4016     }
4017   else
4018     return atype;
4019
4020   *a = suba;
4021   *b = subb;
4022   return wider_type;
4023 }
4024
4025 /* Determines the expression by that USE is expressed from induction variable
4026    CAND at statement AT in LOOP.  The expression is stored in two parts in a
4027    decomposed form.  The invariant part is stored in AFF_INV; while variant
4028    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
4029    non-null.  Returns false if USE cannot be expressed using CAND.  */
4030
4031 static bool
4032 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
4033                        struct iv_cand *cand, class aff_tree *aff_inv,
4034                        class aff_tree *aff_var, widest_int *prat = NULL)
4035 {
4036   tree ubase = use->iv->base, ustep = use->iv->step;
4037   tree cbase = cand->iv->base, cstep = cand->iv->step;
4038   tree common_type, uutype, var, cstep_common;
4039   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4040   aff_tree aff_cbase;
4041   widest_int rat;
4042
4043   /* We must have a precision to express the values of use.  */
4044   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4045     return false;
4046
4047   var = var_at_stmt (loop, cand, at);
4048   uutype = unsigned_type_for (utype);
4049
4050   /* If the conversion is not noop, perform it.  */
4051   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4052     {
4053       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4054           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4055         {
4056           tree inner_base, inner_step, inner_type;
4057           inner_base = TREE_OPERAND (cbase, 0);
4058           if (CONVERT_EXPR_P (cstep))
4059             inner_step = TREE_OPERAND (cstep, 0);
4060           else
4061             inner_step = cstep;
4062
4063           inner_type = TREE_TYPE (inner_base);
4064           /* If candidate is added from a biv whose type is smaller than
4065              ctype, we know both candidate and the biv won't overflow.
4066              In this case, it's safe to skip the convertion in candidate.
4067              As an example, (unsigned short)((unsigned long)A) equals to
4068              (unsigned short)A, if A has a type no larger than short.  */
4069           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4070             {
4071               cbase = inner_base;
4072               cstep = inner_step;
4073             }
4074         }
4075       cbase = fold_convert (uutype, cbase);
4076       cstep = fold_convert (uutype, cstep);
4077       var = fold_convert (uutype, var);
4078     }
4079
4080   /* Ratio is 1 when computing the value of biv cand by itself.
4081      We can't rely on constant_multiple_of in this case because the
4082      use is created after the original biv is selected.  The call
4083      could fail because of inconsistent fold behavior.  See PR68021
4084      for more information.  */
4085   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4086     {
4087       gcc_assert (is_gimple_assign (use->stmt));
4088       gcc_assert (use->iv->ssa_name == cand->var_after);
4089       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4090       rat = 1;
4091     }
4092   else if (!constant_multiple_of (ustep, cstep, &rat))
4093     return false;
4094
4095   if (prat)
4096     *prat = rat;
4097
4098   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4099      type, we achieve better folding by computing their difference in this
4100      wider type, and cast the result to UUTYPE.  We do not need to worry about
4101      overflows, as all the arithmetics will in the end be performed in UUTYPE
4102      anyway.  */
4103   common_type = determine_common_wider_type (&ubase, &cbase);
4104
4105   /* use = ubase - ratio * cbase + ratio * var.  */
4106   tree_to_aff_combination (ubase, common_type, aff_inv);
4107   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4108   tree_to_aff_combination (var, uutype, aff_var);
4109
4110   /* We need to shift the value if we are after the increment.  */
4111   if (stmt_after_increment (loop, cand, at))
4112     {
4113       aff_tree cstep_aff;
4114
4115       if (common_type != uutype)
4116         cstep_common = fold_convert (common_type, cstep);
4117       else
4118         cstep_common = cstep;
4119
4120       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4121       aff_combination_add (&aff_cbase, &cstep_aff);
4122     }
4123
4124   aff_combination_scale (&aff_cbase, -rat);
4125   aff_combination_add (aff_inv, &aff_cbase);
4126   if (common_type != uutype)
4127     aff_combination_convert (aff_inv, uutype);
4128
4129   aff_combination_scale (aff_var, rat);
4130   return true;
4131 }
4132
4133 /* Determines the expression by that USE is expressed from induction variable
4134    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4135    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4136
4137 static bool
4138 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4139                      struct iv_cand *cand, class aff_tree *aff)
4140 {
4141   aff_tree aff_var;
4142
4143   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4144     return false;
4145
4146   aff_combination_add (aff, &aff_var);
4147   return true;
4148 }
4149
4150 /* Return the type of USE.  */
4151
4152 static tree
4153 get_use_type (struct iv_use *use)
4154 {
4155   tree base_type = TREE_TYPE (use->iv->base);
4156   tree type;
4157
4158   if (use->type == USE_REF_ADDRESS)
4159     {
4160       /* The base_type may be a void pointer.  Create a pointer type based on
4161          the mem_ref instead.  */
4162       type = build_pointer_type (TREE_TYPE (*use->op_p));
4163       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4164                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4165     }
4166   else
4167     type = base_type;
4168
4169   return type;
4170 }
4171
4172 /* Determines the expression by that USE is expressed from induction variable
4173    CAND at statement AT in LOOP.  The computation is unshared.  */
4174
4175 static tree
4176 get_computation_at (class loop *loop, gimple *at,
4177                     struct iv_use *use, struct iv_cand *cand)
4178 {
4179   aff_tree aff;
4180   tree type = get_use_type (use);
4181
4182   if (!get_computation_aff (loop, at, use, cand, &aff))
4183     return NULL_TREE;
4184   unshare_aff_combination (&aff);
4185   return fold_convert (type, aff_combination_to_tree (&aff));
4186 }
4187
4188 /* Like get_computation_at, but try harder, even if the computation
4189    is more expensive.  Intended for debug stmts.  */
4190
4191 static tree
4192 get_debug_computation_at (class loop *loop, gimple *at,
4193                           struct iv_use *use, struct iv_cand *cand)
4194 {
4195   if (tree ret = get_computation_at (loop, at, use, cand))
4196     return ret;
4197
4198   tree ubase = use->iv->base, ustep = use->iv->step;
4199   tree cbase = cand->iv->base, cstep = cand->iv->step;
4200   tree var;
4201   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4202   widest_int rat;
4203
4204   /* We must have a precision to express the values of use.  */
4205   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4206     return NULL_TREE;
4207
4208   /* Try to handle the case that get_computation_at doesn't,
4209      try to express
4210      use = ubase + (var - cbase) / ratio.  */
4211   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4212                              &rat))
4213     return NULL_TREE;
4214
4215   bool neg_p = false;
4216   if (wi::neg_p (rat))
4217     {
4218       if (TYPE_UNSIGNED (ctype))
4219         return NULL_TREE;
4220       neg_p = true;
4221       rat = wi::neg (rat);
4222     }
4223
4224   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4225      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4226      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4227      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4228      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4229   if (!use->iv->no_overflow
4230       && !cand->iv->no_overflow
4231       && !integer_pow2p (cstep))
4232     return NULL_TREE;
4233
4234   int bits = wi::exact_log2 (rat);
4235   if (bits == -1)
4236     bits = wi::floor_log2 (rat) + 1;
4237   if (!cand->iv->no_overflow
4238       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4239     return NULL_TREE;
4240
4241   var = var_at_stmt (loop, cand, at);
4242
4243   if (POINTER_TYPE_P (ctype))
4244     {
4245       ctype = unsigned_type_for (ctype);
4246       cbase = fold_convert (ctype, cbase);
4247       cstep = fold_convert (ctype, cstep);
4248       var = fold_convert (ctype, var);
4249     }
4250
4251   if (stmt_after_increment (loop, cand, at))
4252     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4253                        unshare_expr (cstep));
4254
4255   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4256   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4257                      wide_int_to_tree (TREE_TYPE (var), rat));
4258   if (POINTER_TYPE_P (utype))
4259     {
4260       var = fold_convert (sizetype, var);
4261       if (neg_p)
4262         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4263       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4264     }
4265   else
4266     {
4267       var = fold_convert (utype, var);
4268       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4269                          ubase, var);
4270     }
4271   return var;
4272 }
4273
4274 /* Adjust the cost COST for being in loop setup rather than loop body.
4275    If we're optimizing for space, the loop setup overhead is constant;
4276    if we're optimizing for speed, amortize it over the per-iteration cost.
4277    If ROUND_UP_P is true, the result is round up rather than to zero when
4278    optimizing for speed.  */
4279 static int64_t
4280 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4281                    bool round_up_p = false)
4282 {
4283   if (cost == INFTY)
4284     return cost;
4285   else if (optimize_loop_for_speed_p (data->current_loop))
4286     {
4287       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4288       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4289     }
4290   else
4291     return cost;
4292 }
4293
4294 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4295    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4296    calculating the operands of EXPR.  Returns true if successful, and returns
4297    the cost in COST.  */
4298
4299 static bool
4300 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4301                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4302 {
4303   comp_cost res;
4304   tree op1 = TREE_OPERAND (expr, 1);
4305   tree cst = TREE_OPERAND (mult, 1);
4306   tree multop = TREE_OPERAND (mult, 0);
4307   int m = exact_log2 (int_cst_value (cst));
4308   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4309   int as_cost, sa_cost;
4310   bool mult_in_op1;
4311
4312   if (!(m >= 0 && m < maxm))
4313     return false;
4314
4315   STRIP_NOPS (op1);
4316   mult_in_op1 = operand_equal_p (op1, mult, 0);
4317
4318   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4319
4320   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4321      use that in preference to a shift insn followed by an add insn.  */
4322   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4323              ? shiftadd_cost (speed, mode, m)
4324              : (mult_in_op1
4325                 ? shiftsub1_cost (speed, mode, m)
4326                 : shiftsub0_cost (speed, mode, m)));
4327
4328   res = comp_cost (MIN (as_cost, sa_cost), 0);
4329   res += (mult_in_op1 ? cost0 : cost1);
4330
4331   STRIP_NOPS (multop);
4332   if (!is_gimple_val (multop))
4333     res += force_expr_to_var_cost (multop, speed);
4334
4335   *cost = res;
4336   return true;
4337 }
4338
4339 /* Estimates cost of forcing expression EXPR into a variable.  */
4340
4341 static comp_cost
4342 force_expr_to_var_cost (tree expr, bool speed)
4343 {
4344   static bool costs_initialized = false;
4345   static unsigned integer_cost [2];
4346   static unsigned symbol_cost [2];
4347   static unsigned address_cost [2];
4348   tree op0, op1;
4349   comp_cost cost0, cost1, cost;
4350   machine_mode mode;
4351   scalar_int_mode int_mode;
4352
4353   if (!costs_initialized)
4354     {
4355       tree type = build_pointer_type (integer_type_node);
4356       tree var, addr;
4357       rtx x;
4358       int i;
4359
4360       var = create_tmp_var_raw (integer_type_node, "test_var");
4361       TREE_STATIC (var) = 1;
4362       x = produce_memory_decl_rtl (var, NULL);
4363       SET_DECL_RTL (var, x);
4364
4365       addr = build1 (ADDR_EXPR, type, var);
4366
4367
4368       for (i = 0; i < 2; i++)
4369         {
4370           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4371                                                              2000), i);
4372
4373           symbol_cost[i] = computation_cost (addr, i) + 1;
4374
4375           address_cost[i]
4376             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4377           if (dump_file && (dump_flags & TDF_DETAILS))
4378             {
4379               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4380               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4381               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4382               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4383               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4384               fprintf (dump_file, "\n");
4385             }
4386         }
4387
4388       costs_initialized = true;
4389     }
4390
4391   STRIP_NOPS (expr);
4392
4393   if (SSA_VAR_P (expr))
4394     return no_cost;
4395
4396   if (is_gimple_min_invariant (expr))
4397     {
4398       if (poly_int_tree_p (expr))
4399         return comp_cost (integer_cost [speed], 0);
4400
4401       if (TREE_CODE (expr) == ADDR_EXPR)
4402         {
4403           tree obj = TREE_OPERAND (expr, 0);
4404
4405           if (VAR_P (obj)
4406               || TREE_CODE (obj) == PARM_DECL
4407               || TREE_CODE (obj) == RESULT_DECL)
4408             return comp_cost (symbol_cost [speed], 0);
4409         }
4410
4411       return comp_cost (address_cost [speed], 0);
4412     }
4413
4414   switch (TREE_CODE (expr))
4415     {
4416     case POINTER_PLUS_EXPR:
4417     case PLUS_EXPR:
4418     case MINUS_EXPR:
4419     case MULT_EXPR:
4420     case TRUNC_DIV_EXPR:
4421     case BIT_AND_EXPR:
4422     case BIT_IOR_EXPR:
4423     case LSHIFT_EXPR:
4424     case RSHIFT_EXPR:
4425       op0 = TREE_OPERAND (expr, 0);
4426       op1 = TREE_OPERAND (expr, 1);
4427       STRIP_NOPS (op0);
4428       STRIP_NOPS (op1);
4429       break;
4430
4431     CASE_CONVERT:
4432     case NEGATE_EXPR:
4433     case BIT_NOT_EXPR:
4434       op0 = TREE_OPERAND (expr, 0);
4435       STRIP_NOPS (op0);
4436       op1 = NULL_TREE;
4437       break;
4438     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4439        introduce COND_EXPR for IV base, need to support better cost estimation
4440        for this COND_EXPR and tcc_comparison.  */
4441     case COND_EXPR:
4442       op0 = TREE_OPERAND (expr, 1);
4443       STRIP_NOPS (op0);
4444       op1 = TREE_OPERAND (expr, 2);
4445       STRIP_NOPS (op1);
4446       break;
4447     case LT_EXPR:
4448     case LE_EXPR:
4449     case GT_EXPR:
4450     case GE_EXPR:
4451     case EQ_EXPR:
4452     case NE_EXPR:
4453     case UNORDERED_EXPR:
4454     case ORDERED_EXPR:
4455     case UNLT_EXPR:
4456     case UNLE_EXPR:
4457     case UNGT_EXPR:
4458     case UNGE_EXPR:
4459     case UNEQ_EXPR:
4460     case LTGT_EXPR:
4461     case MAX_EXPR:
4462     case MIN_EXPR:
4463       op0 = TREE_OPERAND (expr, 0);
4464       STRIP_NOPS (op0);
4465       op1 = TREE_OPERAND (expr, 1);
4466       STRIP_NOPS (op1);
4467       break;
4468
4469     default:
4470       /* Just an arbitrary value, FIXME.  */
4471       return comp_cost (target_spill_cost[speed], 0);
4472     }
4473
4474   if (op0 == NULL_TREE
4475       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4476     cost0 = no_cost;
4477   else
4478     cost0 = force_expr_to_var_cost (op0, speed);
4479
4480   if (op1 == NULL_TREE
4481       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4482     cost1 = no_cost;
4483   else
4484     cost1 = force_expr_to_var_cost (op1, speed);
4485
4486   mode = TYPE_MODE (TREE_TYPE (expr));
4487   switch (TREE_CODE (expr))
4488     {
4489     case POINTER_PLUS_EXPR:
4490     case PLUS_EXPR:
4491     case MINUS_EXPR:
4492     case NEGATE_EXPR:
4493       cost = comp_cost (add_cost (speed, mode), 0);
4494       if (TREE_CODE (expr) != NEGATE_EXPR)
4495         {
4496           tree mult = NULL_TREE;
4497           comp_cost sa_cost;
4498           if (TREE_CODE (op1) == MULT_EXPR)
4499             mult = op1;
4500           else if (TREE_CODE (op0) == MULT_EXPR)
4501             mult = op0;
4502
4503           if (mult != NULL_TREE
4504               && is_a <scalar_int_mode> (mode, &int_mode)
4505               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4506               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4507                                     speed, &sa_cost))
4508             return sa_cost;
4509         }
4510       break;
4511
4512     CASE_CONVERT:
4513       {
4514         tree inner_mode, outer_mode;
4515         outer_mode = TREE_TYPE (expr);
4516         inner_mode = TREE_TYPE (op0);
4517         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4518                                        TYPE_MODE (inner_mode), speed), 0);
4519       }
4520       break;
4521
4522     case MULT_EXPR:
4523       if (cst_and_fits_in_hwi (op0))
4524         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4525                                              mode, speed), 0);
4526       else if (cst_and_fits_in_hwi (op1))
4527         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4528                                              mode, speed), 0);
4529       else
4530         return comp_cost (target_spill_cost [speed], 0);
4531       break;
4532
4533     case TRUNC_DIV_EXPR:
4534       /* Division by power of two is usually cheap, so we allow it.  Forbid
4535          anything else.  */
4536       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4537         cost = comp_cost (add_cost (speed, mode), 0);
4538       else
4539         cost = comp_cost (target_spill_cost[speed], 0);
4540       break;
4541
4542     case BIT_AND_EXPR:
4543     case BIT_IOR_EXPR:
4544     case BIT_NOT_EXPR:
4545     case LSHIFT_EXPR:
4546     case RSHIFT_EXPR:
4547       cost = comp_cost (add_cost (speed, mode), 0);
4548       break;
4549     case COND_EXPR:
4550       op0 = TREE_OPERAND (expr, 0);
4551       STRIP_NOPS (op0);
4552       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4553           || CONSTANT_CLASS_P (op0))
4554         cost = no_cost;
4555       else
4556         cost = force_expr_to_var_cost (op0, speed);
4557       break;
4558     case LT_EXPR:
4559     case LE_EXPR:
4560     case GT_EXPR:
4561     case GE_EXPR:
4562     case EQ_EXPR:
4563     case NE_EXPR:
4564     case UNORDERED_EXPR:
4565     case ORDERED_EXPR:
4566     case UNLT_EXPR:
4567     case UNLE_EXPR:
4568     case UNGT_EXPR:
4569     case UNGE_EXPR:
4570     case UNEQ_EXPR:
4571     case LTGT_EXPR:
4572     case MAX_EXPR:
4573     case MIN_EXPR:
4574       /* Simply use add cost for now, FIXME if there is some more accurate cost
4575          evaluation way.  */
4576       cost = comp_cost (add_cost (speed, mode), 0);
4577       break;
4578
4579     default:
4580       gcc_unreachable ();
4581     }
4582
4583   cost += cost0;
4584   cost += cost1;
4585   return cost;
4586 }
4587
4588 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4589    invariants the computation depends on.  */
4590
4591 static comp_cost
4592 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4593 {
4594   if (!expr)
4595     return no_cost;
4596
4597   find_inv_vars (data, &expr, inv_vars);
4598   return force_expr_to_var_cost (expr, data->speed);
4599 }
4600
4601 /* Returns cost of auto-modifying address expression in shape base + offset.
4602    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4603    address expression.  The address expression has ADDR_MODE in addr space
4604    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4605    speed or size.  */
4606
4607 enum ainc_type
4608 {
4609   AINC_PRE_INC,         /* Pre increment.  */
4610   AINC_PRE_DEC,         /* Pre decrement.  */
4611   AINC_POST_INC,        /* Post increment.  */
4612   AINC_POST_DEC,        /* Post decrement.  */
4613   AINC_NONE             /* Also the number of auto increment types.  */
4614 };
4615
4616 struct ainc_cost_data
4617 {
4618   int64_t costs[AINC_NONE];
4619 };
4620
4621 static comp_cost
4622 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4623                        machine_mode addr_mode, machine_mode mem_mode,
4624                        addr_space_t as, bool speed)
4625 {
4626   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4627       && !USE_STORE_PRE_DECREMENT (mem_mode)
4628       && !USE_LOAD_POST_DECREMENT (mem_mode)
4629       && !USE_STORE_POST_DECREMENT (mem_mode)
4630       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4631       && !USE_STORE_PRE_INCREMENT (mem_mode)
4632       && !USE_LOAD_POST_INCREMENT (mem_mode)
4633       && !USE_STORE_POST_INCREMENT (mem_mode))
4634     return infinite_cost;
4635
4636   static vec<ainc_cost_data *> ainc_cost_data_list;
4637   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4638   if (idx >= ainc_cost_data_list.length ())
4639     {
4640       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4641
4642       gcc_assert (nsize > idx);
4643       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4644     }
4645
4646   ainc_cost_data *data = ainc_cost_data_list[idx];
4647   if (data == NULL)
4648     {
4649       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4650
4651       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4652       data->costs[AINC_PRE_DEC] = INFTY;
4653       data->costs[AINC_POST_DEC] = INFTY;
4654       data->costs[AINC_PRE_INC] = INFTY;
4655       data->costs[AINC_POST_INC] = INFTY;
4656       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4657           || USE_STORE_PRE_DECREMENT (mem_mode))
4658         {
4659           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4660
4661           if (memory_address_addr_space_p (mem_mode, addr, as))
4662             data->costs[AINC_PRE_DEC]
4663               = address_cost (addr, mem_mode, as, speed);
4664         }
4665       if (USE_LOAD_POST_DECREMENT (mem_mode)
4666           || USE_STORE_POST_DECREMENT (mem_mode))
4667         {
4668           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4669
4670           if (memory_address_addr_space_p (mem_mode, addr, as))
4671             data->costs[AINC_POST_DEC]
4672               = address_cost (addr, mem_mode, as, speed);
4673         }
4674       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4675           || USE_STORE_PRE_INCREMENT (mem_mode))
4676         {
4677           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4678
4679           if (memory_address_addr_space_p (mem_mode, addr, as))
4680             data->costs[AINC_PRE_INC]
4681               = address_cost (addr, mem_mode, as, speed);
4682         }
4683       if (USE_LOAD_POST_INCREMENT (mem_mode)
4684           || USE_STORE_POST_INCREMENT (mem_mode))
4685         {
4686           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4687
4688           if (memory_address_addr_space_p (mem_mode, addr, as))
4689             data->costs[AINC_POST_INC]
4690               = address_cost (addr, mem_mode, as, speed);
4691         }
4692       ainc_cost_data_list[idx] = data;
4693     }
4694
4695   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4696   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4697     return comp_cost (data->costs[AINC_POST_INC], 0);
4698   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4699     return comp_cost (data->costs[AINC_POST_DEC], 0);
4700   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4701     return comp_cost (data->costs[AINC_PRE_INC], 0);
4702   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4703     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4704
4705   return infinite_cost;
4706 }
4707
4708 /* Return cost of computing USE's address expression by using CAND.
4709    AFF_INV and AFF_VAR represent invariant and variant parts of the
4710    address expression, respectively.  If AFF_INV is simple, store
4711    the loop invariant variables which are depended by it in INV_VARS;
4712    if AFF_INV is complicated, handle it as a new invariant expression
4713    and record it in INV_EXPR.  RATIO indicates multiple times between
4714    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4715    value to it indicating if this is an auto-increment address.  */
4716
4717 static comp_cost
4718 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4719                   struct iv_cand *cand, aff_tree *aff_inv,
4720                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4721                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4722                   bool *can_autoinc, bool speed)
4723 {
4724   rtx addr;
4725   bool simple_inv = true;
4726   tree comp_inv = NULL_TREE, type = aff_var->type;
4727   comp_cost var_cost = no_cost, cost = no_cost;
4728   struct mem_address parts = {NULL_TREE, integer_one_node,
4729                               NULL_TREE, NULL_TREE, NULL_TREE};
4730   machine_mode addr_mode = TYPE_MODE (type);
4731   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4732   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4733   /* Only true if ratio != 1.  */
4734   bool ok_with_ratio_p = false;
4735   bool ok_without_ratio_p = false;
4736   code_helper code = ERROR_MARK;
4737
4738   if (use->type == USE_PTR_ADDRESS)
4739     {
4740       gcall *call = as_a<gcall *> (use->stmt);
4741       gcc_assert (gimple_call_internal_p (call));
4742       code = gimple_call_internal_fn (call);
4743     }
4744
4745   if (!aff_combination_const_p (aff_inv))
4746     {
4747       parts.index = integer_one_node;
4748       /* Addressing mode "base + index".  */
4749       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4750       if (ratio != 1)
4751         {
4752           parts.step = wide_int_to_tree (type, ratio);
4753           /* Addressing mode "base + index << scale".  */
4754           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4755           if (!ok_with_ratio_p)
4756             parts.step = NULL_TREE;
4757         }
4758       if (ok_with_ratio_p || ok_without_ratio_p)
4759         {
4760           if (maybe_ne (aff_inv->offset, 0))
4761             {
4762               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4763               /* Addressing mode "base + index [<< scale] + offset".  */
4764               if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4765                 parts.offset = NULL_TREE;
4766               else
4767                 aff_inv->offset = 0;
4768             }
4769
4770           move_fixed_address_to_symbol (&parts, aff_inv);
4771           /* Base is fixed address and is moved to symbol part.  */
4772           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4773             parts.base = NULL_TREE;
4774
4775           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4776           if (parts.symbol != NULL_TREE
4777               && !valid_mem_ref_p (mem_mode, as, &parts, code))
4778             {
4779               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4780               parts.symbol = NULL_TREE;
4781               /* Reset SIMPLE_INV since symbol address needs to be computed
4782                  outside of address expression in this case.  */
4783               simple_inv = false;
4784               /* Symbol part is moved back to base part, it can't be NULL.  */
4785               parts.base = integer_one_node;
4786             }
4787         }
4788       else
4789         parts.index = NULL_TREE;
4790     }
4791   else
4792     {
4793       poly_int64 ainc_step;
4794       if (can_autoinc
4795           && ratio == 1
4796           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4797         {
4798           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4799
4800           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4801             ainc_offset += ainc_step;
4802           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4803                                         addr_mode, mem_mode, as, speed);
4804           if (!cost.infinite_cost_p ())
4805             {
4806               *can_autoinc = true;
4807               return cost;
4808             }
4809           cost = no_cost;
4810         }
4811       if (!aff_combination_zero_p (aff_inv))
4812         {
4813           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4814           /* Addressing mode "base + offset".  */
4815           if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4816             parts.offset = NULL_TREE;
4817           else
4818             aff_inv->offset = 0;
4819         }
4820     }
4821
4822   if (simple_inv)
4823     simple_inv = (aff_inv == NULL
4824                   || aff_combination_const_p (aff_inv)
4825                   || aff_combination_singleton_var_p (aff_inv));
4826   if (!aff_combination_zero_p (aff_inv))
4827     comp_inv = aff_combination_to_tree (aff_inv);
4828   if (comp_inv != NULL_TREE)
4829     cost = force_var_cost (data, comp_inv, inv_vars);
4830   if (ratio != 1 && parts.step == NULL_TREE)
4831     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4832   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4833     var_cost += add_cost (speed, addr_mode);
4834
4835   if (comp_inv && inv_expr && !simple_inv)
4836     {
4837       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4838       /* Clear depends on.  */
4839       if (*inv_expr != NULL && inv_vars && *inv_vars)
4840         bitmap_clear (*inv_vars);
4841
4842       /* Cost of small invariant expression adjusted against loop niters
4843          is usually zero, which makes it difficult to be differentiated
4844          from candidate based on loop invariant variables.  Secondly, the
4845          generated invariant expression may not be hoisted out of loop by
4846          following pass.  We penalize the cost by rounding up in order to
4847          neutralize such effects.  */
4848       cost.cost = adjust_setup_cost (data, cost.cost, true);
4849       cost.scratch = cost.cost;
4850     }
4851
4852   cost += var_cost;
4853   addr = addr_for_mem_ref (&parts, as, false);
4854   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4855   cost += address_cost (addr, mem_mode, as, speed);
4856
4857   if (parts.symbol != NULL_TREE)
4858     cost.complexity += 1;
4859   /* Don't increase the complexity of adding a scaled index if it's
4860      the only kind of index that the target allows.  */
4861   if (parts.step != NULL_TREE && ok_without_ratio_p)
4862     cost.complexity += 1;
4863   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4864     cost.complexity += 1;
4865   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4866     cost.complexity += 1;
4867
4868   return cost;
4869 }
4870
4871 /* Scale (multiply) the computed COST (except scratch part that should be
4872    hoisted out a loop) by header->frequency / AT->frequency, which makes
4873    expected cost more accurate.  */
4874
4875 static comp_cost
4876 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4877 {
4878   if (data->speed
4879       && data->current_loop->header->count.to_frequency (cfun) > 0)
4880     {
4881       basic_block bb = gimple_bb (at);
4882       gcc_assert (cost.scratch <= cost.cost);
4883       int scale_factor = (int)(intptr_t) bb->aux;
4884       if (scale_factor == 1)
4885         return cost;
4886
4887       int64_t scaled_cost
4888         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4889
4890       if (dump_file && (dump_flags & TDF_DETAILS))
4891         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4892                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4893                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4894
4895       cost.cost = scaled_cost;
4896     }
4897
4898   return cost;
4899 }
4900
4901 /* Determines the cost of the computation by that USE is expressed
4902    from induction variable CAND.  If ADDRESS_P is true, we just need
4903    to create an address from it, otherwise we want to get it into
4904    register.  A set of invariants we depend on is stored in INV_VARS.
4905    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4906    addressing is likely.  If INV_EXPR is nonnull, record invariant
4907    expr entry in it.  */
4908
4909 static comp_cost
4910 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4911                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4912                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4913 {
4914   gimple *at = use->stmt;
4915   tree ubase = use->iv->base, cbase = cand->iv->base;
4916   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4917   tree comp_inv = NULL_TREE;
4918   HOST_WIDE_INT ratio, aratio;
4919   comp_cost cost;
4920   widest_int rat;
4921   aff_tree aff_inv, aff_var;
4922   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4923
4924   if (inv_vars)
4925     *inv_vars = NULL;
4926   if (can_autoinc)
4927     *can_autoinc = false;
4928   if (inv_expr)
4929     *inv_expr = NULL;
4930
4931   /* Check if we have enough precision to express the values of use.  */
4932   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4933     return infinite_cost;
4934
4935   if (address_p
4936       || (use->iv->base_object
4937           && cand->iv->base_object
4938           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4939           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4940     {
4941       /* Do not try to express address of an object with computation based
4942          on address of a different object.  This may cause problems in rtl
4943          level alias analysis (that does not expect this to be happening,
4944          as this is illegal in C), and would be unlikely to be useful
4945          anyway.  */
4946       if (use->iv->base_object
4947           && cand->iv->base_object
4948           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4949         return infinite_cost;
4950     }
4951
4952   if (!get_computation_aff_1 (data->current_loop, at, use,
4953                               cand, &aff_inv, &aff_var, &rat)
4954       || !wi::fits_shwi_p (rat))
4955     return infinite_cost;
4956
4957   ratio = rat.to_shwi ();
4958   if (address_p)
4959     {
4960       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4961                                inv_vars, inv_expr, can_autoinc, speed);
4962       cost = get_scaled_computation_cost_at (data, at, cost);
4963       /* For doloop IV cand, add on the extra cost.  */
4964       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4965       return cost;
4966     }
4967
4968   bool simple_inv = (aff_combination_const_p (&aff_inv)
4969                      || aff_combination_singleton_var_p (&aff_inv));
4970   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4971   aff_combination_convert (&aff_inv, signed_type);
4972   if (!aff_combination_zero_p (&aff_inv))
4973     comp_inv = aff_combination_to_tree (&aff_inv);
4974
4975   cost = force_var_cost (data, comp_inv, inv_vars);
4976   if (comp_inv && inv_expr && !simple_inv)
4977     {
4978       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4979       /* Clear depends on.  */
4980       if (*inv_expr != NULL && inv_vars && *inv_vars)
4981         bitmap_clear (*inv_vars);
4982
4983       cost.cost = adjust_setup_cost (data, cost.cost);
4984       /* Record setup cost in scratch field.  */
4985       cost.scratch = cost.cost;
4986     }
4987   /* Cost of constant integer can be covered when adding invariant part to
4988      variant part.  */
4989   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4990     cost = no_cost;
4991
4992   /* Need type narrowing to represent use with cand.  */
4993   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4994     {
4995       machine_mode outer_mode = TYPE_MODE (utype);
4996       machine_mode inner_mode = TYPE_MODE (ctype);
4997       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4998     }
4999
5000   /* Turn a + i * (-c) into a - i * c.  */
5001   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
5002     aratio = -ratio;
5003   else
5004     aratio = ratio;
5005
5006   if (ratio != 1)
5007     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
5008
5009   /* TODO: We may also need to check if we can compute  a + i * 4 in one
5010      instruction.  */
5011   /* Need to add up the invariant and variant parts.  */
5012   if (comp_inv && !integer_zerop (comp_inv))
5013     cost += add_cost (speed, TYPE_MODE (utype));
5014
5015   cost = get_scaled_computation_cost_at (data, at, cost);
5016
5017   /* For doloop IV cand, add on the extra cost.  */
5018   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
5019     cost += targetm.doloop_cost_for_generic;
5020
5021   return cost;
5022 }
5023
5024 /* Determines cost of computing the use in GROUP with CAND in a generic
5025    expression.  */
5026
5027 static bool
5028 determine_group_iv_cost_generic (struct ivopts_data *data,
5029                                  struct iv_group *group, struct iv_cand *cand)
5030 {
5031   comp_cost cost;
5032   iv_inv_expr_ent *inv_expr = NULL;
5033   bitmap inv_vars = NULL, inv_exprs = NULL;
5034   struct iv_use *use = group->vuses[0];
5035
5036   /* The simple case first -- if we need to express value of the preserved
5037      original biv, the cost is 0.  This also prevents us from counting the
5038      cost of increment twice -- once at this use and once in the cost of
5039      the candidate.  */
5040   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5041     cost = no_cost;
5042   /* If the IV candidate involves undefined SSA values and is not the
5043      same IV as on the USE avoid using that candidate here.  */
5044   else if (cand->involves_undefs
5045            && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
5046     return false;
5047   else
5048     cost = get_computation_cost (data, use, cand, false,
5049                                  &inv_vars, NULL, &inv_expr);
5050
5051   if (inv_expr)
5052     {
5053       inv_exprs = BITMAP_ALLOC (NULL);
5054       bitmap_set_bit (inv_exprs, inv_expr->id);
5055     }
5056   set_group_iv_cost (data, group, cand, cost, inv_vars,
5057                      NULL_TREE, ERROR_MARK, inv_exprs);
5058   return !cost.infinite_cost_p ();
5059 }
5060
5061 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5062
5063 static bool
5064 determine_group_iv_cost_address (struct ivopts_data *data,
5065                                  struct iv_group *group, struct iv_cand *cand)
5066 {
5067   unsigned i;
5068   bitmap inv_vars = NULL, inv_exprs = NULL;
5069   bool can_autoinc;
5070   iv_inv_expr_ent *inv_expr = NULL;
5071   struct iv_use *use = group->vuses[0];
5072   comp_cost sum_cost = no_cost, cost;
5073
5074   cost = get_computation_cost (data, use, cand, true,
5075                                &inv_vars, &can_autoinc, &inv_expr);
5076
5077   if (inv_expr)
5078     {
5079       inv_exprs = BITMAP_ALLOC (NULL);
5080       bitmap_set_bit (inv_exprs, inv_expr->id);
5081     }
5082   sum_cost = cost;
5083   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5084     {
5085       if (can_autoinc)
5086         sum_cost -= cand->cost_step;
5087       /* If we generated the candidate solely for exploiting autoincrement
5088          opportunities, and it turns out it can't be used, set the cost to
5089          infinity to make sure we ignore it.  */
5090       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5091         sum_cost = infinite_cost;
5092     }
5093
5094   /* Uses in a group can share setup code, so only add setup cost once.  */
5095   cost -= cost.scratch;
5096   /* Compute and add costs for rest uses of this group.  */
5097   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5098     {
5099       struct iv_use *next = group->vuses[i];
5100
5101       /* TODO: We could skip computing cost for sub iv_use when it has the
5102          same cost as the first iv_use, but the cost really depends on the
5103          offset and where the iv_use is.  */
5104         cost = get_computation_cost (data, next, cand, true,
5105                                      NULL, &can_autoinc, &inv_expr);
5106         if (inv_expr)
5107           {
5108             if (!inv_exprs)
5109               inv_exprs = BITMAP_ALLOC (NULL);
5110
5111             bitmap_set_bit (inv_exprs, inv_expr->id);
5112           }
5113       sum_cost += cost;
5114     }
5115   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5116                      NULL_TREE, ERROR_MARK, inv_exprs);
5117
5118   return !sum_cost.infinite_cost_p ();
5119 }
5120
5121 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5122    and stores it to VAL.  */
5123
5124 static void
5125 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5126                class tree_niter_desc *desc, aff_tree *val)
5127 {
5128   aff_tree step, delta, nit;
5129   struct iv *iv = cand->iv;
5130   tree type = TREE_TYPE (iv->base);
5131   tree niter = desc->niter;
5132   bool after_adjust = stmt_after_increment (loop, cand, at);
5133   tree steptype;
5134
5135   if (POINTER_TYPE_P (type))
5136     steptype = sizetype;
5137   else
5138     steptype = unsigned_type_for (type);
5139
5140   /* If AFTER_ADJUST is required, the code below generates the equivalent
5141      of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5142      BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5143      SSA_NAME - 1.  Unfortunately, guaranteeing that adding 1 to NITER
5144      doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5145      class for common idioms that we know are safe.  */
5146   if (after_adjust
5147       && desc->control.no_overflow
5148       && integer_onep (desc->control.step)
5149       && (desc->cmp == LT_EXPR
5150           || desc->cmp == NE_EXPR)
5151       && TREE_CODE (desc->bound) == SSA_NAME)
5152     {
5153       if (integer_onep (desc->control.base))
5154         {
5155           niter = desc->bound;
5156           after_adjust = false;
5157         }
5158       else if (TREE_CODE (niter) == MINUS_EXPR
5159                && integer_onep (TREE_OPERAND (niter, 1)))
5160         {
5161           niter = TREE_OPERAND (niter, 0);
5162           after_adjust = false;
5163         }
5164     }
5165
5166   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5167   aff_combination_convert (&step, steptype);
5168   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5169   aff_combination_convert (&nit, steptype);
5170   aff_combination_mult (&nit, &step, &delta);
5171   if (after_adjust)
5172     aff_combination_add (&delta, &step);
5173
5174   tree_to_aff_combination (iv->base, type, val);
5175   if (!POINTER_TYPE_P (type))
5176     aff_combination_convert (val, steptype);
5177   aff_combination_add (val, &delta);
5178 }
5179
5180 /* Returns period of induction variable iv.  */
5181
5182 static tree
5183 iv_period (struct iv *iv)
5184 {
5185   tree step = iv->step, period, type;
5186   tree pow2div;
5187
5188   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5189
5190   type = unsigned_type_for (TREE_TYPE (step));
5191   /* Period of the iv is lcm (step, type_range)/step -1,
5192      i.e., N*type_range/step - 1. Since type range is power
5193      of two, N == (step >> num_of_ending_zeros_binary (step),
5194      so the final result is
5195
5196        (type_range >> num_of_ending_zeros_binary (step)) - 1
5197
5198   */
5199   pow2div = num_ending_zeros (step);
5200
5201   period = build_low_bits_mask (type,
5202                                 (TYPE_PRECISION (type)
5203                                  - tree_to_uhwi (pow2div)));
5204
5205   return period;
5206 }
5207
5208 /* Returns the comparison operator used when eliminating the iv USE.  */
5209
5210 static enum tree_code
5211 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5212 {
5213   class loop *loop = data->current_loop;
5214   basic_block ex_bb;
5215   edge exit;
5216
5217   ex_bb = gimple_bb (use->stmt);
5218   exit = EDGE_SUCC (ex_bb, 0);
5219   if (flow_bb_inside_loop_p (loop, exit->dest))
5220     exit = EDGE_SUCC (ex_bb, 1);
5221
5222   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5223 }
5224
5225 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5226    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5227    calculation is performed in non-wrapping type.
5228
5229    TODO: More generally, we could test for the situation that
5230          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5231          This would require knowing the sign of OFFSET.  */
5232
5233 static bool
5234 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5235 {
5236   enum tree_code code;
5237   tree e1, e2;
5238   aff_tree aff_e1, aff_e2, aff_offset;
5239
5240   if (!nowrap_type_p (TREE_TYPE (base)))
5241     return false;
5242
5243   base = expand_simple_operations (base);
5244
5245   if (TREE_CODE (base) == SSA_NAME)
5246     {
5247       gimple *stmt = SSA_NAME_DEF_STMT (base);
5248
5249       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5250         return false;
5251
5252       code = gimple_assign_rhs_code (stmt);
5253       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5254         return false;
5255
5256       e1 = gimple_assign_rhs1 (stmt);
5257       e2 = gimple_assign_rhs2 (stmt);
5258     }
5259   else
5260     {
5261       code = TREE_CODE (base);
5262       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5263         return false;
5264       e1 = TREE_OPERAND (base, 0);
5265       e2 = TREE_OPERAND (base, 1);
5266     }
5267
5268   /* Use affine expansion as deeper inspection to prove the equality.  */
5269   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5270                                   &aff_e2, &data->name_expansion_cache);
5271   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5272                                   &aff_offset, &data->name_expansion_cache);
5273   aff_combination_scale (&aff_offset, -1);
5274   switch (code)
5275     {
5276     case PLUS_EXPR:
5277       aff_combination_add (&aff_e2, &aff_offset);
5278       if (aff_combination_zero_p (&aff_e2))
5279         return true;
5280
5281       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5282                                       &aff_e1, &data->name_expansion_cache);
5283       aff_combination_add (&aff_e1, &aff_offset);
5284       return aff_combination_zero_p (&aff_e1);
5285
5286     case POINTER_PLUS_EXPR:
5287       aff_combination_add (&aff_e2, &aff_offset);
5288       return aff_combination_zero_p (&aff_e2);
5289
5290     default:
5291       return false;
5292     }
5293 }
5294
5295 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5296    comparison with CAND.  NITER describes the number of iterations of
5297    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5298
5299    We aim to handle the following situation:
5300
5301    sometype *base, *p;
5302    int a, b, i;
5303
5304    i = a;
5305    p = p_0 = base + a;
5306
5307    do
5308      {
5309        bla (*p);
5310        p++;
5311        i++;
5312      }
5313    while (i < b);
5314
5315    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5316    We aim to optimize this to
5317
5318    p = p_0 = base + a;
5319    do
5320      {
5321        bla (*p);
5322        p++;
5323      }
5324    while (p < p_0 - a + b);
5325
5326    This preserves the correctness, since the pointer arithmetics does not
5327    overflow.  More precisely:
5328
5329    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5330       overflow in computing it or the values of p.
5331    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5332       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5333
5334 static bool
5335 iv_elimination_compare_lt (struct ivopts_data *data,
5336                            struct iv_cand *cand, enum tree_code *comp_p,
5337                            class tree_niter_desc *niter)
5338 {
5339   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5340   class aff_tree nit, tmpa, tmpb;
5341   enum tree_code comp;
5342   HOST_WIDE_INT step;
5343
5344   /* We need to know that the candidate induction variable does not overflow.
5345      While more complex analysis may be used to prove this, for now just
5346      check that the variable appears in the original program and that it
5347      is computed in a type that guarantees no overflows.  */
5348   cand_type = TREE_TYPE (cand->iv->base);
5349   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5350     return false;
5351
5352   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5353      the calculation of the BOUND could overflow, making the comparison
5354      invalid.  */
5355   if (!data->loop_single_exit_p)
5356     return false;
5357
5358   /* We need to be able to decide whether candidate is increasing or decreasing
5359      in order to choose the right comparison operator.  */
5360   if (!cst_and_fits_in_hwi (cand->iv->step))
5361     return false;
5362   step = int_cst_value (cand->iv->step);
5363
5364   /* Check that the number of iterations matches the expected pattern:
5365      a + 1 > b ? 0 : b - a - 1.  */
5366   mbz = niter->may_be_zero;
5367   if (TREE_CODE (mbz) == GT_EXPR)
5368     {
5369       /* Handle a + 1 > b.  */
5370       tree op0 = TREE_OPERAND (mbz, 0);
5371       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5372         {
5373           a = TREE_OPERAND (op0, 0);
5374           b = TREE_OPERAND (mbz, 1);
5375         }
5376       else
5377         return false;
5378     }
5379   else if (TREE_CODE (mbz) == LT_EXPR)
5380     {
5381       tree op1 = TREE_OPERAND (mbz, 1);
5382
5383       /* Handle b < a + 1.  */
5384       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5385         {
5386           a = TREE_OPERAND (op1, 0);
5387           b = TREE_OPERAND (mbz, 0);
5388         }
5389       else
5390         return false;
5391     }
5392   else
5393     return false;
5394
5395   /* Expected number of iterations is B - A - 1.  Check that it matches
5396      the actual number, i.e., that B - A - NITER = 1.  */
5397   tree_to_aff_combination (niter->niter, nit_type, &nit);
5398   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5399   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5400   aff_combination_scale (&nit, -1);
5401   aff_combination_scale (&tmpa, -1);
5402   aff_combination_add (&tmpb, &tmpa);
5403   aff_combination_add (&tmpb, &nit);
5404   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5405     return false;
5406
5407   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5408      overflow.  */
5409   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5410                         cand->iv->step,
5411                         fold_convert (TREE_TYPE (cand->iv->step), a));
5412   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5413     return false;
5414
5415   /* Determine the new comparison operator.  */
5416   comp = step < 0 ? GT_EXPR : LT_EXPR;
5417   if (*comp_p == NE_EXPR)
5418     *comp_p = comp;
5419   else if (*comp_p == EQ_EXPR)
5420     *comp_p = invert_tree_comparison (comp, false);
5421   else
5422     gcc_unreachable ();
5423
5424   return true;
5425 }
5426
5427 /* Check whether it is possible to express the condition in USE by comparison
5428    of candidate CAND.  If so, store the value compared with to BOUND, and the
5429    comparison operator to COMP.  */
5430
5431 static bool
5432 may_eliminate_iv (struct ivopts_data *data,
5433                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5434                   enum tree_code *comp)
5435 {
5436   basic_block ex_bb;
5437   edge exit;
5438   tree period;
5439   class loop *loop = data->current_loop;
5440   aff_tree bnd;
5441   class tree_niter_desc *desc = NULL;
5442
5443   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5444     return false;
5445
5446   /* For now works only for exits that dominate the loop latch.
5447      TODO: extend to other conditions inside loop body.  */
5448   ex_bb = gimple_bb (use->stmt);
5449   if (use->stmt != last_nondebug_stmt (ex_bb)
5450       || gimple_code (use->stmt) != GIMPLE_COND
5451       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5452     return false;
5453
5454   exit = EDGE_SUCC (ex_bb, 0);
5455   if (flow_bb_inside_loop_p (loop, exit->dest))
5456     exit = EDGE_SUCC (ex_bb, 1);
5457   if (flow_bb_inside_loop_p (loop, exit->dest))
5458     return false;
5459
5460   desc = niter_for_exit (data, exit);
5461   if (!desc)
5462     return false;
5463
5464   /* Determine whether we can use the variable to test the exit condition.
5465      This is the case iff the period of the induction variable is greater
5466      than the number of iterations for which the exit condition is true.  */
5467   period = iv_period (cand->iv);
5468
5469   /* If the number of iterations is constant, compare against it directly.  */
5470   if (TREE_CODE (desc->niter) == INTEGER_CST)
5471     {
5472       /* See cand_value_at.  */
5473       if (stmt_after_increment (loop, cand, use->stmt))
5474         {
5475           if (!tree_int_cst_lt (desc->niter, period))
5476             return false;
5477         }
5478       else
5479         {
5480           if (tree_int_cst_lt (period, desc->niter))
5481             return false;
5482         }
5483     }
5484
5485   /* If not, and if this is the only possible exit of the loop, see whether
5486      we can get a conservative estimate on the number of iterations of the
5487      entire loop and compare against that instead.  */
5488   else
5489     {
5490       widest_int period_value, max_niter;
5491
5492       max_niter = desc->max;
5493       if (stmt_after_increment (loop, cand, use->stmt))
5494         max_niter += 1;
5495       period_value = wi::to_widest (period);
5496       if (wi::gtu_p (max_niter, period_value))
5497         {
5498           /* See if we can take advantage of inferred loop bound
5499              information.  */
5500           if (data->loop_single_exit_p)
5501             {
5502               if (!max_loop_iterations (loop, &max_niter))
5503                 return false;
5504               /* The loop bound is already adjusted by adding 1.  */
5505               if (wi::gtu_p (max_niter, period_value))
5506                 return false;
5507             }
5508           else
5509             return false;
5510         }
5511     }
5512
5513   /* For doloop IV cand, the bound would be zero.  It's safe whether
5514      may_be_zero set or not.  */
5515   if (cand->doloop_p)
5516     {
5517       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5518       *comp = iv_elimination_compare (data, use);
5519       return true;
5520     }
5521
5522   cand_value_at (loop, cand, use->stmt, desc, &bnd);
5523
5524   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5525                          aff_combination_to_tree (&bnd));
5526   *comp = iv_elimination_compare (data, use);
5527
5528   /* It is unlikely that computing the number of iterations using division
5529      would be more profitable than keeping the original induction variable.  */
5530   if (expression_expensive_p (*bound))
5531     return false;
5532
5533   /* Sometimes, it is possible to handle the situation that the number of
5534      iterations may be zero unless additional assumptions by using <
5535      instead of != in the exit condition.
5536
5537      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5538            base the exit condition on it.  However, that is often too
5539            expensive.  */
5540   if (!integer_zerop (desc->may_be_zero))
5541     return iv_elimination_compare_lt (data, cand, comp, desc);
5542
5543   return true;
5544 }
5545
5546  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5547     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5548
5549 static int
5550 parm_decl_cost (struct ivopts_data *data, tree bound)
5551 {
5552   tree sbound = bound;
5553   STRIP_NOPS (sbound);
5554
5555   if (TREE_CODE (sbound) == SSA_NAME
5556       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5557       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5558       && data->body_includes_call)
5559     return COSTS_N_INSNS (1);
5560
5561   return 0;
5562 }
5563
5564 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5565
5566 static bool
5567 determine_group_iv_cost_cond (struct ivopts_data *data,
5568                               struct iv_group *group, struct iv_cand *cand)
5569 {
5570   tree bound = NULL_TREE;
5571   struct iv *cmp_iv;
5572   bitmap inv_exprs = NULL;
5573   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5574   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5575   enum comp_iv_rewrite rewrite_type;
5576   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5577   tree *control_var, *bound_cst;
5578   enum tree_code comp = ERROR_MARK;
5579   struct iv_use *use = group->vuses[0];
5580
5581   /* Extract condition operands.  */
5582   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5583                                         &bound_cst, NULL, &cmp_iv);
5584   gcc_assert (rewrite_type != COMP_IV_NA);
5585
5586   /* Try iv elimination.  */
5587   if (rewrite_type == COMP_IV_ELIM
5588       && may_eliminate_iv (data, use, cand, &bound, &comp))
5589     {
5590       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5591       if (elim_cost.cost == 0)
5592         elim_cost.cost = parm_decl_cost (data, bound);
5593       else if (TREE_CODE (bound) == INTEGER_CST)
5594         elim_cost.cost = 0;
5595       /* If we replace a loop condition 'i < n' with 'p < base + n',
5596          inv_vars_elim will have 'base' and 'n' set, which implies that both
5597          'base' and 'n' will be live during the loop.    More likely,
5598          'base + n' will be loop invariant, resulting in only one live value
5599          during the loop.  So in that case we clear inv_vars_elim and set
5600          inv_expr_elim instead.  */
5601       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5602         {
5603           inv_expr_elim = get_loop_invariant_expr (data, bound);
5604           bitmap_clear (inv_vars_elim);
5605         }
5606       /* The bound is a loop invariant, so it will be only computed
5607          once.  */
5608       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5609     }
5610
5611   /* When the condition is a comparison of the candidate IV against
5612      zero, prefer this IV.
5613
5614      TODO: The constant that we're subtracting from the cost should
5615      be target-dependent.  This information should be added to the
5616      target costs for each backend.  */
5617   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5618       && integer_zerop (*bound_cst)
5619       && (operand_equal_p (*control_var, cand->var_after, 0)
5620           || operand_equal_p (*control_var, cand->var_before, 0)))
5621     elim_cost -= 1;
5622
5623   express_cost = get_computation_cost (data, use, cand, false,
5624                                        &inv_vars_express, NULL,
5625                                        &inv_expr_express);
5626   if (cmp_iv != NULL)
5627     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5628
5629   /* Count the cost of the original bound as well.  */
5630   bound_cost = force_var_cost (data, *bound_cst, NULL);
5631   if (bound_cost.cost == 0)
5632     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5633   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5634     bound_cost.cost = 0;
5635   express_cost += bound_cost;
5636
5637   /* Choose the better approach, preferring the eliminated IV. */
5638   if (elim_cost <= express_cost)
5639     {
5640       cost = elim_cost;
5641       inv_vars = inv_vars_elim;
5642       inv_vars_elim = NULL;
5643       inv_expr = inv_expr_elim;
5644       /* For doloop candidate/use pair, adjust to zero cost.  */
5645       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5646         cost = no_cost;
5647     }
5648   else
5649     {
5650       cost = express_cost;
5651       inv_vars = inv_vars_express;
5652       inv_vars_express = NULL;
5653       bound = NULL_TREE;
5654       comp = ERROR_MARK;
5655       inv_expr = inv_expr_express;
5656     }
5657
5658   if (inv_expr)
5659     {
5660       inv_exprs = BITMAP_ALLOC (NULL);
5661       bitmap_set_bit (inv_exprs, inv_expr->id);
5662     }
5663   set_group_iv_cost (data, group, cand, cost,
5664                      inv_vars, bound, comp, inv_exprs);
5665
5666   if (inv_vars_elim)
5667     BITMAP_FREE (inv_vars_elim);
5668   if (inv_vars_express)
5669     BITMAP_FREE (inv_vars_express);
5670
5671   return !cost.infinite_cost_p ();
5672 }
5673
5674 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5675    if USE cannot be represented with CAND.  */
5676
5677 static bool
5678 determine_group_iv_cost (struct ivopts_data *data,
5679                          struct iv_group *group, struct iv_cand *cand)
5680 {
5681   switch (group->type)
5682     {
5683     case USE_NONLINEAR_EXPR:
5684       return determine_group_iv_cost_generic (data, group, cand);
5685
5686     case USE_REF_ADDRESS:
5687     case USE_PTR_ADDRESS:
5688       return determine_group_iv_cost_address (data, group, cand);
5689
5690     case USE_COMPARE:
5691       return determine_group_iv_cost_cond (data, group, cand);
5692
5693     default:
5694       gcc_unreachable ();
5695     }
5696 }
5697
5698 /* Return true if get_computation_cost indicates that autoincrement is
5699    a possibility for the pair of USE and CAND, false otherwise.  */
5700
5701 static bool
5702 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5703                            struct iv_cand *cand)
5704 {
5705   if (!address_p (use->type))
5706     return false;
5707
5708   bool can_autoinc = false;
5709   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5710   return can_autoinc;
5711 }
5712
5713 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5714    use that allows autoincrement, and set their AINC_USE if possible.  */
5715
5716 static void
5717 set_autoinc_for_original_candidates (struct ivopts_data *data)
5718 {
5719   unsigned i, j;
5720
5721   for (i = 0; i < data->vcands.length (); i++)
5722     {
5723       struct iv_cand *cand = data->vcands[i];
5724       struct iv_use *closest_before = NULL;
5725       struct iv_use *closest_after = NULL;
5726       if (cand->pos != IP_ORIGINAL)
5727         continue;
5728
5729       for (j = 0; j < data->vgroups.length (); j++)
5730         {
5731           struct iv_group *group = data->vgroups[j];
5732           struct iv_use *use = group->vuses[0];
5733           unsigned uid = gimple_uid (use->stmt);
5734
5735           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5736             continue;
5737
5738           if (uid < gimple_uid (cand->incremented_at)
5739               && (closest_before == NULL
5740                   || uid > gimple_uid (closest_before->stmt)))
5741             closest_before = use;
5742
5743           if (uid > gimple_uid (cand->incremented_at)
5744               && (closest_after == NULL
5745                   || uid < gimple_uid (closest_after->stmt)))
5746             closest_after = use;
5747         }
5748
5749       if (closest_before != NULL
5750           && autoinc_possible_for_pair (data, closest_before, cand))
5751         cand->ainc_use = closest_before;
5752       else if (closest_after != NULL
5753                && autoinc_possible_for_pair (data, closest_after, cand))
5754         cand->ainc_use = closest_after;
5755     }
5756 }
5757
5758 /* Relate compare use with all candidates.  */
5759
5760 static void
5761 relate_compare_use_with_all_cands (struct ivopts_data *data)
5762 {
5763   unsigned i, count = data->vcands.length ();
5764   for (i = 0; i < data->vgroups.length (); i++)
5765     {
5766       struct iv_group *group = data->vgroups[i];
5767
5768       if (group->type == USE_COMPARE)
5769         bitmap_set_range (group->related_cands, 0, count);
5770     }
5771 }
5772
5773 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5774    PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1.  */
5775
5776 static tree
5777 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5778                              const widest_int &iterations_max)
5779 {
5780   tree ntype = TREE_TYPE (niter);
5781   tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5782   if (!pref_type)
5783     return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5784                         build_int_cst (ntype, 1));
5785
5786   gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5787
5788   int prec = TYPE_PRECISION (ntype);
5789   int pref_prec = TYPE_PRECISION (pref_type);
5790
5791   tree base;
5792
5793   /* Check if the PREFERRED_MODED is able to present niter.  */
5794   if (pref_prec > prec
5795       || wi::ltu_p (iterations_max,
5796                     widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5797                                       UNSIGNED)))
5798     {
5799       /* No wrap, it is safe to use preferred type after niter + 1.  */
5800       if (wi::ltu_p (iterations_max,
5801                      widest_int::from (wi::max_value (prec, UNSIGNED),
5802                                        UNSIGNED)))
5803         {
5804           /* This could help to optimize "-1 +1" pair when niter looks
5805              like "n-1": n is in original mode.  "base = (n - 1) + 1"
5806              in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
5807           base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5808                               build_int_cst (ntype, 1));
5809           base = fold_convert (pref_type, base);
5810         }
5811
5812       /* To avoid wrap, convert niter to preferred type before plus 1.  */
5813       else
5814         {
5815           niter = fold_convert (pref_type, niter);
5816           base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5817                               build_int_cst (pref_type, 1));
5818         }
5819     }
5820   else
5821     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5822                         build_int_cst (ntype, 1));
5823   return base;
5824 }
5825
5826 /* Add one doloop dedicated IV candidate:
5827      - Base is (may_be_zero ? 1 : (niter + 1)).
5828      - Step is -1.  */
5829
5830 static void
5831 add_iv_candidate_for_doloop (struct ivopts_data *data)
5832 {
5833   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5834   gcc_assert (niter_desc && niter_desc->assumptions);
5835
5836   tree niter = niter_desc->niter;
5837   tree ntype = TREE_TYPE (niter);
5838   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5839
5840   tree may_be_zero = niter_desc->may_be_zero;
5841   if (may_be_zero && integer_zerop (may_be_zero))
5842     may_be_zero = NULL_TREE;
5843   if (may_be_zero)
5844     {
5845       if (COMPARISON_CLASS_P (may_be_zero))
5846         {
5847           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5848                                build_int_cst (ntype, 0),
5849                                rewrite_to_non_trapping_overflow (niter));
5850         }
5851       /* Don't try to obtain the iteration count expression when may_be_zero is
5852          integer_nonzerop (actually iteration count is one) or else.  */
5853       else
5854         return;
5855     }
5856
5857   machine_mode mode = TYPE_MODE (ntype);
5858   machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5859
5860   tree base;
5861   if (mode != pref_mode)
5862     {
5863       base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5864       ntype = TREE_TYPE (base);
5865     }
5866   else
5867     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5868                         build_int_cst (ntype, 1));
5869
5870
5871   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5872 }
5873
5874 /* Finds the candidates for the induction variables.  */
5875
5876 static void
5877 find_iv_candidates (struct ivopts_data *data)
5878 {
5879   /* Add commonly used ivs.  */
5880   add_standard_iv_candidates (data);
5881
5882   /* Add doloop dedicated ivs.  */
5883   if (data->doloop_use_p)
5884     add_iv_candidate_for_doloop (data);
5885
5886   /* Add old induction variables.  */
5887   add_iv_candidate_for_bivs (data);
5888
5889   /* Add induction variables derived from uses.  */
5890   add_iv_candidate_for_groups (data);
5891
5892   set_autoinc_for_original_candidates (data);
5893
5894   /* Record the important candidates.  */
5895   record_important_candidates (data);
5896
5897   /* Relate compare iv_use with all candidates.  */
5898   if (!data->consider_all_candidates)
5899     relate_compare_use_with_all_cands (data);
5900
5901   if (dump_file && (dump_flags & TDF_DETAILS))
5902     {
5903       unsigned i;
5904
5905       fprintf (dump_file, "\n<Important Candidates>:\t");
5906       for (i = 0; i < data->vcands.length (); i++)
5907         if (data->vcands[i]->important)
5908           fprintf (dump_file, " %d,", data->vcands[i]->id);
5909       fprintf (dump_file, "\n");
5910
5911       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5912       for (i = 0; i < data->vgroups.length (); i++)
5913         {
5914           struct iv_group *group = data->vgroups[i];
5915
5916           if (group->related_cands)
5917             {
5918               fprintf (dump_file, "  Group %d:\t", group->id);
5919               dump_bitmap (dump_file, group->related_cands);
5920             }
5921         }
5922       fprintf (dump_file, "\n");
5923     }
5924 }
5925
5926 /* Determines costs of computing use of iv with an iv candidate.  */
5927
5928 static void
5929 determine_group_iv_costs (struct ivopts_data *data)
5930 {
5931   unsigned i, j;
5932   struct iv_cand *cand;
5933   struct iv_group *group;
5934   bitmap to_clear = BITMAP_ALLOC (NULL);
5935
5936   alloc_use_cost_map (data);
5937
5938   for (i = 0; i < data->vgroups.length (); i++)
5939     {
5940       group = data->vgroups[i];
5941
5942       if (data->consider_all_candidates)
5943         {
5944           for (j = 0; j < data->vcands.length (); j++)
5945             {
5946               cand = data->vcands[j];
5947               determine_group_iv_cost (data, group, cand);
5948             }
5949         }
5950       else
5951         {
5952           bitmap_iterator bi;
5953
5954           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5955             {
5956               cand = data->vcands[j];
5957               if (!determine_group_iv_cost (data, group, cand))
5958                 bitmap_set_bit (to_clear, j);
5959             }
5960
5961           /* Remove the candidates for that the cost is infinite from
5962              the list of related candidates.  */
5963           bitmap_and_compl_into (group->related_cands, to_clear);
5964           bitmap_clear (to_clear);
5965         }
5966     }
5967
5968   BITMAP_FREE (to_clear);
5969
5970   if (dump_file && (dump_flags & TDF_DETAILS))
5971     {
5972       bitmap_iterator bi;
5973
5974       /* Dump invariant variables.  */
5975       fprintf (dump_file, "\n<Invariant Vars>:\n");
5976       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5977         {
5978           struct version_info *info = ver_info (data, i);
5979           if (info->inv_id)
5980             {
5981               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5982               print_generic_expr (dump_file, info->name, TDF_SLIM);
5983               fprintf (dump_file, "%s\n",
5984                        info->has_nonlin_use ? "" : "\t(eliminable)");
5985             }
5986         }
5987
5988       /* Dump invariant expressions.  */
5989       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5990       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5991
5992       for (hash_table<iv_inv_expr_hasher>::iterator it
5993            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5994            ++it)
5995         list.safe_push (*it);
5996
5997       list.qsort (sort_iv_inv_expr_ent);
5998
5999       for (i = 0; i < list.length (); ++i)
6000         {
6001           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
6002           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
6003           fprintf (dump_file, "\n");
6004         }
6005
6006       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
6007
6008       for (i = 0; i < data->vgroups.length (); i++)
6009         {
6010           group = data->vgroups[i];
6011
6012           fprintf (dump_file, "Group %d:\n", i);
6013           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
6014           for (j = 0; j < group->n_map_members; j++)
6015             {
6016               if (!group->cost_map[j].cand
6017                   || group->cost_map[j].cost.infinite_cost_p ())
6018                 continue;
6019
6020               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
6021                        group->cost_map[j].cand->id,
6022                        group->cost_map[j].cost.cost,
6023                        group->cost_map[j].cost.complexity);
6024               if (!group->cost_map[j].inv_exprs
6025                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
6026                 fprintf (dump_file, "NIL;\t");
6027               else
6028                 bitmap_print (dump_file,
6029                               group->cost_map[j].inv_exprs, "", ";\t");
6030               if (!group->cost_map[j].inv_vars
6031                   || bitmap_empty_p (group->cost_map[j].inv_vars))
6032                 fprintf (dump_file, "NIL;\n");
6033               else
6034                 bitmap_print (dump_file,
6035                               group->cost_map[j].inv_vars, "", "\n");
6036             }
6037
6038           fprintf (dump_file, "\n");
6039         }
6040       fprintf (dump_file, "\n");
6041     }
6042 }
6043
6044 /* Determines cost of the candidate CAND.  */
6045
6046 static void
6047 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6048 {
6049   comp_cost cost_base;
6050   int64_t cost, cost_step;
6051   tree base;
6052
6053   gcc_assert (cand->iv != NULL);
6054
6055   /* There are two costs associated with the candidate -- its increment
6056      and its initialization.  The second is almost negligible for any loop
6057      that rolls enough, so we take it just very little into account.  */
6058
6059   base = cand->iv->base;
6060   cost_base = force_var_cost (data, base, NULL);
6061   /* It will be exceptional that the iv register happens to be initialized with
6062      the proper value at no cost.  In general, there will at least be a regcopy
6063      or a const set.  */
6064   if (cost_base.cost == 0)
6065     cost_base.cost = COSTS_N_INSNS (1);
6066   /* Doloop decrement should be considered as zero cost.  */
6067   if (cand->doloop_p)
6068     cost_step = 0;
6069   else
6070     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6071   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6072
6073   /* Prefer the original ivs unless we may gain something by replacing it.
6074      The reason is to make debugging simpler; so this is not relevant for
6075      artificial ivs created by other optimization passes.  */
6076   if ((cand->pos != IP_ORIGINAL
6077        || !SSA_NAME_VAR (cand->var_before)
6078        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6079       /* Prefer doloop as well.  */
6080       && !cand->doloop_p)
6081     cost++;
6082
6083   /* Prefer not to insert statements into latch unless there are some
6084      already (so that we do not create unnecessary jumps).  */
6085   if (cand->pos == IP_END
6086       && empty_block_p (ip_end_pos (data->current_loop)))
6087     cost++;
6088
6089   cand->cost = cost;
6090   cand->cost_step = cost_step;
6091 }
6092
6093 /* Determines costs of computation of the candidates.  */
6094
6095 static void
6096 determine_iv_costs (struct ivopts_data *data)
6097 {
6098   unsigned i;
6099
6100   if (dump_file && (dump_flags & TDF_DETAILS))
6101     {
6102       fprintf (dump_file, "<Candidate Costs>:\n");
6103       fprintf (dump_file, "  cand\tcost\n");
6104     }
6105
6106   for (i = 0; i < data->vcands.length (); i++)
6107     {
6108       struct iv_cand *cand = data->vcands[i];
6109
6110       determine_iv_cost (data, cand);
6111
6112       if (dump_file && (dump_flags & TDF_DETAILS))
6113         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
6114     }
6115
6116   if (dump_file && (dump_flags & TDF_DETAILS))
6117     fprintf (dump_file, "\n");
6118 }
6119
6120 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6121    induction variables.  Note N_INVS includes both invariant variables and
6122    invariant expressions.  */
6123
6124 static unsigned
6125 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6126                               unsigned n_cands)
6127 {
6128   unsigned cost;
6129   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6130   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6131   bool speed = data->speed;
6132
6133   /* If there is a call in the loop body, the call-clobbered registers
6134      are not available for loop invariants.  */
6135   if (data->body_includes_call)
6136     available_regs = available_regs - target_clobbered_regs;
6137
6138   /* If we have enough registers.  */
6139   if (regs_needed + target_res_regs < available_regs)
6140     cost = n_new;
6141   /* If close to running out of registers, try to preserve them.  */
6142   else if (regs_needed <= available_regs)
6143     cost = target_reg_cost [speed] * regs_needed;
6144   /* If we run out of available registers but the number of candidates
6145      does not, we penalize extra registers using target_spill_cost.  */
6146   else if (n_cands <= available_regs)
6147     cost = target_reg_cost [speed] * available_regs
6148            + target_spill_cost [speed] * (regs_needed - available_regs);
6149   /* If the number of candidates runs out available registers, we penalize
6150      extra candidate registers using target_spill_cost * 2.  Because it is
6151      more expensive to spill induction variable than invariant.  */
6152   else
6153     cost = target_reg_cost [speed] * available_regs
6154            + target_spill_cost [speed] * (n_cands - available_regs) * 2
6155            + target_spill_cost [speed] * (regs_needed - n_cands);
6156
6157   /* Finally, add the number of candidates, so that we prefer eliminating
6158      induction variables if possible.  */
6159   return cost + n_cands;
6160 }
6161
6162 /* For each size of the induction variable set determine the penalty.  */
6163
6164 static void
6165 determine_set_costs (struct ivopts_data *data)
6166 {
6167   unsigned j, n;
6168   gphi *phi;
6169   gphi_iterator psi;
6170   tree op;
6171   class loop *loop = data->current_loop;
6172   bitmap_iterator bi;
6173
6174   if (dump_file && (dump_flags & TDF_DETAILS))
6175     {
6176       fprintf (dump_file, "<Global Costs>:\n");
6177       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6178       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6179       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6180       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6181     }
6182
6183   n = 0;
6184   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6185     {
6186       phi = psi.phi ();
6187       op = PHI_RESULT (phi);
6188
6189       if (virtual_operand_p (op))
6190         continue;
6191
6192       if (get_iv (data, op))
6193         continue;
6194
6195       if (!POINTER_TYPE_P (TREE_TYPE (op))
6196           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6197         continue;
6198
6199       n++;
6200     }
6201
6202   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6203     {
6204       struct version_info *info = ver_info (data, j);
6205
6206       if (info->inv_id && info->has_nonlin_use)
6207         n++;
6208     }
6209
6210   data->regs_used = n;
6211   if (dump_file && (dump_flags & TDF_DETAILS))
6212     fprintf (dump_file, "  regs_used %d\n", n);
6213
6214   if (dump_file && (dump_flags & TDF_DETAILS))
6215     {
6216       fprintf (dump_file, "  cost for size:\n");
6217       fprintf (dump_file, "  ivs\tcost\n");
6218       for (j = 0; j <= 2 * target_avail_regs; j++)
6219         fprintf (dump_file, "  %d\t%d\n", j,
6220                  ivopts_estimate_reg_pressure (data, 0, j));
6221       fprintf (dump_file, "\n");
6222     }
6223 }
6224
6225 /* Returns true if A is a cheaper cost pair than B.  */
6226
6227 static bool
6228 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6229 {
6230   if (!a)
6231     return false;
6232
6233   if (!b)
6234     return true;
6235
6236   if (a->cost < b->cost)
6237     return true;
6238
6239   if (b->cost < a->cost)
6240     return false;
6241
6242   /* In case the costs are the same, prefer the cheaper candidate.  */
6243   if (a->cand->cost < b->cand->cost)
6244     return true;
6245
6246   return false;
6247 }
6248
6249 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6250    for more expensive, equal and cheaper respectively.  */
6251
6252 static int
6253 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6254 {
6255   if (cheaper_cost_pair (a, b))
6256     return -1;
6257   if (cheaper_cost_pair (b, a))
6258     return 1;
6259
6260   return 0;
6261 }
6262
6263 /* Returns candidate by that USE is expressed in IVS.  */
6264
6265 static class cost_pair *
6266 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6267 {
6268   return ivs->cand_for_group[group->id];
6269 }
6270
6271 /* Computes the cost field of IVS structure.  */
6272
6273 static void
6274 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6275 {
6276   comp_cost cost = ivs->cand_use_cost;
6277
6278   cost += ivs->cand_cost;
6279   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6280   ivs->cost = cost;
6281 }
6282
6283 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6284    and IVS.  */
6285
6286 static void
6287 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6288 {
6289   bitmap_iterator bi;
6290   unsigned iid;
6291
6292   if (!invs)
6293     return;
6294
6295   gcc_assert (n_inv_uses != NULL);
6296   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6297     {
6298       n_inv_uses[iid]--;
6299       if (n_inv_uses[iid] == 0)
6300         ivs->n_invs--;
6301     }
6302 }
6303
6304 /* Set USE not to be expressed by any candidate in IVS.  */
6305
6306 static void
6307 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6308                  struct iv_group *group)
6309 {
6310   unsigned gid = group->id, cid;
6311   class cost_pair *cp;
6312
6313   cp = ivs->cand_for_group[gid];
6314   if (!cp)
6315     return;
6316   cid = cp->cand->id;
6317
6318   ivs->bad_groups++;
6319   ivs->cand_for_group[gid] = NULL;
6320   ivs->n_cand_uses[cid]--;
6321
6322   if (ivs->n_cand_uses[cid] == 0)
6323     {
6324       bitmap_clear_bit (ivs->cands, cid);
6325       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6326         ivs->n_cands--;
6327       ivs->cand_cost -= cp->cand->cost;
6328       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6329       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6330     }
6331
6332   ivs->cand_use_cost -= cp->cost;
6333   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6334   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6335   iv_ca_recount_cost (data, ivs);
6336 }
6337
6338 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6339    IVS.  */
6340
6341 static void
6342 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6343 {
6344   bitmap_iterator bi;
6345   unsigned iid;
6346
6347   if (!invs)
6348     return;
6349
6350   gcc_assert (n_inv_uses != NULL);
6351   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6352     {
6353       n_inv_uses[iid]++;
6354       if (n_inv_uses[iid] == 1)
6355         ivs->n_invs++;
6356     }
6357 }
6358
6359 /* Set cost pair for GROUP in set IVS to CP.  */
6360
6361 static void
6362 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6363               struct iv_group *group, class cost_pair *cp)
6364 {
6365   unsigned gid = group->id, cid;
6366
6367   if (ivs->cand_for_group[gid] == cp)
6368     return;
6369
6370   if (ivs->cand_for_group[gid])
6371     iv_ca_set_no_cp (data, ivs, group);
6372
6373   if (cp)
6374     {
6375       cid = cp->cand->id;
6376
6377       ivs->bad_groups--;
6378       ivs->cand_for_group[gid] = cp;
6379       ivs->n_cand_uses[cid]++;
6380       if (ivs->n_cand_uses[cid] == 1)
6381         {
6382           bitmap_set_bit (ivs->cands, cid);
6383           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6384             ivs->n_cands++;
6385           ivs->cand_cost += cp->cand->cost;
6386           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6387           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6388         }
6389
6390       ivs->cand_use_cost += cp->cost;
6391       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6392       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6393       iv_ca_recount_cost (data, ivs);
6394     }
6395 }
6396
6397 /* Extend set IVS by expressing USE by some of the candidates in it
6398    if possible.  Consider all important candidates if candidates in
6399    set IVS don't give any result.  */
6400
6401 static void
6402 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6403                struct iv_group *group)
6404 {
6405   class cost_pair *best_cp = NULL, *cp;
6406   bitmap_iterator bi;
6407   unsigned i;
6408   struct iv_cand *cand;
6409
6410   gcc_assert (ivs->upto >= group->id);
6411   ivs->upto++;
6412   ivs->bad_groups++;
6413
6414   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6415     {
6416       cand = data->vcands[i];
6417       cp = get_group_iv_cost (data, group, cand);
6418       if (cheaper_cost_pair (cp, best_cp))
6419         best_cp = cp;
6420     }
6421
6422   if (best_cp == NULL)
6423     {
6424       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6425         {
6426           cand = data->vcands[i];
6427           cp = get_group_iv_cost (data, group, cand);
6428           if (cheaper_cost_pair (cp, best_cp))
6429             best_cp = cp;
6430         }
6431     }
6432
6433   iv_ca_set_cp (data, ivs, group, best_cp);
6434 }
6435
6436 /* Get cost for assignment IVS.  */
6437
6438 static comp_cost
6439 iv_ca_cost (class iv_ca *ivs)
6440 {
6441   /* This was a conditional expression but it triggered a bug in
6442      Sun C 5.5.  */
6443   if (ivs->bad_groups)
6444     return infinite_cost;
6445   else
6446     return ivs->cost;
6447 }
6448
6449 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6450    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6451    respectively.  */
6452
6453 static int
6454 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6455                     struct iv_group *group, class cost_pair *old_cp,
6456                     class cost_pair *new_cp)
6457 {
6458   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6459   unsigned old_n_invs = ivs->n_invs;
6460   iv_ca_set_cp (data, ivs, group, new_cp);
6461   unsigned new_n_invs = ivs->n_invs;
6462   iv_ca_set_cp (data, ivs, group, old_cp);
6463
6464   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6465 }
6466
6467 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6468    it before NEXT.  */
6469
6470 static struct iv_ca_delta *
6471 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6472                  class cost_pair *new_cp, struct iv_ca_delta *next)
6473 {
6474   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6475
6476   change->group = group;
6477   change->old_cp = old_cp;
6478   change->new_cp = new_cp;
6479   change->next = next;
6480
6481   return change;
6482 }
6483
6484 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6485    are rewritten.  */
6486
6487 static struct iv_ca_delta *
6488 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6489 {
6490   struct iv_ca_delta *last;
6491
6492   if (!l2)
6493     return l1;
6494
6495   if (!l1)
6496     return l2;
6497
6498   for (last = l1; last->next; last = last->next)
6499     continue;
6500   last->next = l2;
6501
6502   return l1;
6503 }
6504
6505 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6506
6507 static struct iv_ca_delta *
6508 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6509 {
6510   struct iv_ca_delta *act, *next, *prev = NULL;
6511
6512   for (act = delta; act; act = next)
6513     {
6514       next = act->next;
6515       act->next = prev;
6516       prev = act;
6517
6518       std::swap (act->old_cp, act->new_cp);
6519     }
6520
6521   return prev;
6522 }
6523
6524 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6525    reverted instead.  */
6526
6527 static void
6528 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6529                     struct iv_ca_delta *delta, bool forward)
6530 {
6531   class cost_pair *from, *to;
6532   struct iv_ca_delta *act;
6533
6534   if (!forward)
6535     delta = iv_ca_delta_reverse (delta);
6536
6537   for (act = delta; act; act = act->next)
6538     {
6539       from = act->old_cp;
6540       to = act->new_cp;
6541       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6542       iv_ca_set_cp (data, ivs, act->group, to);
6543     }
6544
6545   if (!forward)
6546     iv_ca_delta_reverse (delta);
6547 }
6548
6549 /* Returns true if CAND is used in IVS.  */
6550
6551 static bool
6552 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6553 {
6554   return ivs->n_cand_uses[cand->id] > 0;
6555 }
6556
6557 /* Returns number of induction variable candidates in the set IVS.  */
6558
6559 static unsigned
6560 iv_ca_n_cands (class iv_ca *ivs)
6561 {
6562   return ivs->n_cands;
6563 }
6564
6565 /* Free the list of changes DELTA.  */
6566
6567 static void
6568 iv_ca_delta_free (struct iv_ca_delta **delta)
6569 {
6570   struct iv_ca_delta *act, *next;
6571
6572   for (act = *delta; act; act = next)
6573     {
6574       next = act->next;
6575       free (act);
6576     }
6577
6578   *delta = NULL;
6579 }
6580
6581 /* Allocates new iv candidates assignment.  */
6582
6583 static class iv_ca *
6584 iv_ca_new (struct ivopts_data *data)
6585 {
6586   class iv_ca *nw = XNEW (class iv_ca);
6587
6588   nw->upto = 0;
6589   nw->bad_groups = 0;
6590   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6591                                  data->vgroups.length ());
6592   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6593   nw->cands = BITMAP_ALLOC (NULL);
6594   nw->n_cands = 0;
6595   nw->n_invs = 0;
6596   nw->cand_use_cost = no_cost;
6597   nw->cand_cost = 0;
6598   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6599   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6600   nw->cost = no_cost;
6601
6602   return nw;
6603 }
6604
6605 /* Free memory occupied by the set IVS.  */
6606
6607 static void
6608 iv_ca_free (class iv_ca **ivs)
6609 {
6610   free ((*ivs)->cand_for_group);
6611   free ((*ivs)->n_cand_uses);
6612   BITMAP_FREE ((*ivs)->cands);
6613   free ((*ivs)->n_inv_var_uses);
6614   free ((*ivs)->n_inv_expr_uses);
6615   free (*ivs);
6616   *ivs = NULL;
6617 }
6618
6619 /* Dumps IVS to FILE.  */
6620
6621 static void
6622 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6623 {
6624   unsigned i;
6625   comp_cost cost = iv_ca_cost (ivs);
6626
6627   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6628            cost.complexity);
6629   fprintf (file, "  reg_cost: %d\n",
6630            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6631   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6632            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6633            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6634   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6635
6636   for (i = 0; i < ivs->upto; i++)
6637     {
6638       struct iv_group *group = data->vgroups[i];
6639       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6640       if (cp)
6641         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6642                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6643                  cp->cost.cost, cp->cost.complexity);
6644       else
6645         fprintf (file, "   group:%d --> ??\n", group->id);
6646     }
6647
6648   const char *pref = "";
6649   fprintf (file, "  invariant variables: ");
6650   for (i = 1; i <= data->max_inv_var_id; i++)
6651     if (ivs->n_inv_var_uses[i])
6652       {
6653         fprintf (file, "%s%d", pref, i);
6654         pref = ", ";
6655       }
6656
6657   pref = "";
6658   fprintf (file, "\n  invariant expressions: ");
6659   for (i = 1; i <= data->max_inv_expr_id; i++)
6660     if (ivs->n_inv_expr_uses[i])
6661       {
6662         fprintf (file, "%s%d", pref, i);
6663         pref = ", ";
6664       }
6665
6666   fprintf (file, "\n\n");
6667 }
6668
6669 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6670    new set, and store differences in DELTA.  Number of induction variables
6671    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6672    the function will try to find a solution with mimimal iv candidates.  */
6673
6674 static comp_cost
6675 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6676               struct iv_cand *cand, struct iv_ca_delta **delta,
6677               unsigned *n_ivs, bool min_ncand)
6678 {
6679   unsigned i;
6680   comp_cost cost;
6681   struct iv_group *group;
6682   class cost_pair *old_cp, *new_cp;
6683
6684   *delta = NULL;
6685   for (i = 0; i < ivs->upto; i++)
6686     {
6687       group = data->vgroups[i];
6688       old_cp = iv_ca_cand_for_group (ivs, group);
6689
6690       if (old_cp
6691           && old_cp->cand == cand)
6692         continue;
6693
6694       new_cp = get_group_iv_cost (data, group, cand);
6695       if (!new_cp)
6696         continue;
6697
6698       if (!min_ncand)
6699         {
6700           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6701           /* Skip if new_cp depends on more invariants.  */
6702           if (cmp_invs > 0)
6703             continue;
6704
6705           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6706           /* Skip if new_cp is not cheaper.  */
6707           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6708             continue;
6709         }
6710
6711       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6712     }
6713
6714   iv_ca_delta_commit (data, ivs, *delta, true);
6715   cost = iv_ca_cost (ivs);
6716   if (n_ivs)
6717     *n_ivs = iv_ca_n_cands (ivs);
6718   iv_ca_delta_commit (data, ivs, *delta, false);
6719
6720   return cost;
6721 }
6722
6723 /* Try narrowing set IVS by removing CAND.  Return the cost of
6724    the new set and store the differences in DELTA.  START is
6725    the candidate with which we start narrowing.  */
6726
6727 static comp_cost
6728 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6729               struct iv_cand *cand, struct iv_cand *start,
6730               struct iv_ca_delta **delta)
6731 {
6732   unsigned i, ci;
6733   struct iv_group *group;
6734   class cost_pair *old_cp, *new_cp, *cp;
6735   bitmap_iterator bi;
6736   struct iv_cand *cnd;
6737   comp_cost cost, best_cost, acost;
6738
6739   *delta = NULL;
6740   for (i = 0; i < data->vgroups.length (); i++)
6741     {
6742       group = data->vgroups[i];
6743
6744       old_cp = iv_ca_cand_for_group (ivs, group);
6745       if (old_cp->cand != cand)
6746         continue;
6747
6748       best_cost = iv_ca_cost (ivs);
6749       /* Start narrowing with START.  */
6750       new_cp = get_group_iv_cost (data, group, start);
6751
6752       if (data->consider_all_candidates)
6753         {
6754           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6755             {
6756               if (ci == cand->id || (start && ci == start->id))
6757                 continue;
6758
6759               cnd = data->vcands[ci];
6760
6761               cp = get_group_iv_cost (data, group, cnd);
6762               if (!cp)
6763                 continue;
6764
6765               iv_ca_set_cp (data, ivs, group, cp);
6766               acost = iv_ca_cost (ivs);
6767
6768               if (acost < best_cost)
6769                 {
6770                   best_cost = acost;
6771                   new_cp = cp;
6772                 }
6773             }
6774         }
6775       else
6776         {
6777           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6778             {
6779               if (ci == cand->id || (start && ci == start->id))
6780                 continue;
6781
6782               cnd = data->vcands[ci];
6783
6784               cp = get_group_iv_cost (data, group, cnd);
6785               if (!cp)
6786                 continue;
6787
6788               iv_ca_set_cp (data, ivs, group, cp);
6789               acost = iv_ca_cost (ivs);
6790
6791               if (acost < best_cost)
6792                 {
6793                   best_cost = acost;
6794                   new_cp = cp;
6795                 }
6796             }
6797         }
6798       /* Restore to old cp for use.  */
6799       iv_ca_set_cp (data, ivs, group, old_cp);
6800
6801       if (!new_cp)
6802         {
6803           iv_ca_delta_free (delta);
6804           return infinite_cost;
6805         }
6806
6807       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6808     }
6809
6810   iv_ca_delta_commit (data, ivs, *delta, true);
6811   cost = iv_ca_cost (ivs);
6812   iv_ca_delta_commit (data, ivs, *delta, false);
6813
6814   return cost;
6815 }
6816
6817 /* Try optimizing the set of candidates IVS by removing candidates different
6818    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6819    differences in DELTA.  */
6820
6821 static comp_cost
6822 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6823              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6824 {
6825   bitmap_iterator bi;
6826   struct iv_ca_delta *act_delta, *best_delta;
6827   unsigned i;
6828   comp_cost best_cost, acost;
6829   struct iv_cand *cand;
6830
6831   best_delta = NULL;
6832   best_cost = iv_ca_cost (ivs);
6833
6834   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6835     {
6836       cand = data->vcands[i];
6837
6838       if (cand == except_cand)
6839         continue;
6840
6841       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6842
6843       if (acost < best_cost)
6844         {
6845           best_cost = acost;
6846           iv_ca_delta_free (&best_delta);
6847           best_delta = act_delta;
6848         }
6849       else
6850         iv_ca_delta_free (&act_delta);
6851     }
6852
6853   if (!best_delta)
6854     {
6855       *delta = NULL;
6856       return best_cost;
6857     }
6858
6859   /* Recurse to possibly remove other unnecessary ivs.  */
6860   iv_ca_delta_commit (data, ivs, best_delta, true);
6861   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6862   iv_ca_delta_commit (data, ivs, best_delta, false);
6863   *delta = iv_ca_delta_join (best_delta, *delta);
6864   return best_cost;
6865 }
6866
6867 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6868    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6869    the corresponding cost_pair, otherwise just return BEST_CP.  */
6870
6871 static class cost_pair*
6872 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6873                         unsigned int cand_idx, struct iv_cand *old_cand,
6874                         class cost_pair *best_cp)
6875 {
6876   struct iv_cand *cand;
6877   class cost_pair *cp;
6878
6879   gcc_assert (old_cand != NULL && best_cp != NULL);
6880   if (cand_idx == old_cand->id)
6881     return best_cp;
6882
6883   cand = data->vcands[cand_idx];
6884   cp = get_group_iv_cost (data, group, cand);
6885   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6886     return cp;
6887
6888   return best_cp;
6889 }
6890
6891 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6892    which are used by more than one iv uses.  For each of those candidates,
6893    this function tries to represent iv uses under that candidate using
6894    other ones with lower local cost, then tries to prune the new set.
6895    If the new set has lower cost, It returns the new cost after recording
6896    candidate replacement in list DELTA.  */
6897
6898 static comp_cost
6899 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6900                struct iv_ca_delta **delta)
6901 {
6902   bitmap_iterator bi, bj;
6903   unsigned int i, j, k;
6904   struct iv_cand *cand;
6905   comp_cost orig_cost, acost;
6906   struct iv_ca_delta *act_delta, *tmp_delta;
6907   class cost_pair *old_cp, *best_cp = NULL;
6908
6909   *delta = NULL;
6910   orig_cost = iv_ca_cost (ivs);
6911
6912   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6913     {
6914       if (ivs->n_cand_uses[i] == 1
6915           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6916         continue;
6917
6918       cand = data->vcands[i];
6919
6920       act_delta = NULL;
6921       /*  Represent uses under current candidate using other ones with
6922           lower local cost.  */
6923       for (j = 0; j < ivs->upto; j++)
6924         {
6925           struct iv_group *group = data->vgroups[j];
6926           old_cp = iv_ca_cand_for_group (ivs, group);
6927
6928           if (old_cp->cand != cand)
6929             continue;
6930
6931           best_cp = old_cp;
6932           if (data->consider_all_candidates)
6933             for (k = 0; k < data->vcands.length (); k++)
6934               best_cp = cheaper_cost_with_cand (data, group, k,
6935                                                 old_cp->cand, best_cp);
6936           else
6937             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6938               best_cp = cheaper_cost_with_cand (data, group, k,
6939                                                 old_cp->cand, best_cp);
6940
6941           if (best_cp == old_cp)
6942             continue;
6943
6944           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6945         }
6946       /* No need for further prune.  */
6947       if (!act_delta)
6948         continue;
6949
6950       /* Prune the new candidate set.  */
6951       iv_ca_delta_commit (data, ivs, act_delta, true);
6952       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6953       iv_ca_delta_commit (data, ivs, act_delta, false);
6954       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6955
6956       if (acost < orig_cost)
6957         {
6958           *delta = act_delta;
6959           return acost;
6960         }
6961       else
6962         iv_ca_delta_free (&act_delta);
6963     }
6964
6965   return orig_cost;
6966 }
6967
6968 /* Tries to extend the sets IVS in the best possible way in order to
6969    express the GROUP.  If ORIGINALP is true, prefer candidates from
6970    the original set of IVs, otherwise favor important candidates not
6971    based on any memory object.  */
6972
6973 static bool
6974 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6975                   struct iv_group *group, bool originalp)
6976 {
6977   comp_cost best_cost, act_cost;
6978   unsigned i;
6979   bitmap_iterator bi;
6980   struct iv_cand *cand;
6981   struct iv_ca_delta *best_delta = NULL, *act_delta;
6982   class cost_pair *cp;
6983
6984   iv_ca_add_group (data, ivs, group);
6985   best_cost = iv_ca_cost (ivs);
6986   cp = iv_ca_cand_for_group (ivs, group);
6987   if (cp)
6988     {
6989       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6990       iv_ca_set_no_cp (data, ivs, group);
6991     }
6992
6993   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6994      first try important candidates not based on any memory object.  Only if
6995      this fails, try the specific ones.  Rationale -- in loops with many
6996      variables the best choice often is to use just one generic biv.  If we
6997      added here many ivs specific to the uses, the optimization algorithm later
6998      would be likely to get stuck in a local minimum, thus causing us to create
6999      too many ivs.  The approach from few ivs to more seems more likely to be
7000      successful -- starting from few ivs, replacing an expensive use by a
7001      specific iv should always be a win.  */
7002   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
7003     {
7004       cand = data->vcands[i];
7005
7006       if (originalp && cand->pos !=IP_ORIGINAL)
7007         continue;
7008
7009       if (!originalp && cand->iv->base_object != NULL_TREE)
7010         continue;
7011
7012       if (iv_ca_cand_used_p (ivs, cand))
7013         continue;
7014
7015       cp = get_group_iv_cost (data, group, cand);
7016       if (!cp)
7017         continue;
7018
7019       iv_ca_set_cp (data, ivs, group, cp);
7020       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
7021                                true);
7022       iv_ca_set_no_cp (data, ivs, group);
7023       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
7024
7025       if (act_cost < best_cost)
7026         {
7027           best_cost = act_cost;
7028
7029           iv_ca_delta_free (&best_delta);
7030           best_delta = act_delta;
7031         }
7032       else
7033         iv_ca_delta_free (&act_delta);
7034     }
7035
7036   if (best_cost.infinite_cost_p ())
7037     {
7038       for (i = 0; i < group->n_map_members; i++)
7039         {
7040           cp = group->cost_map + i;
7041           cand = cp->cand;
7042           if (!cand)
7043             continue;
7044
7045           /* Already tried this.  */
7046           if (cand->important)
7047             {
7048               if (originalp && cand->pos == IP_ORIGINAL)
7049                 continue;
7050               if (!originalp && cand->iv->base_object == NULL_TREE)
7051                 continue;
7052             }
7053
7054           if (iv_ca_cand_used_p (ivs, cand))
7055             continue;
7056
7057           act_delta = NULL;
7058           iv_ca_set_cp (data, ivs, group, cp);
7059           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7060           iv_ca_set_no_cp (data, ivs, group);
7061           act_delta = iv_ca_delta_add (group,
7062                                        iv_ca_cand_for_group (ivs, group),
7063                                        cp, act_delta);
7064
7065           if (act_cost < best_cost)
7066             {
7067               best_cost = act_cost;
7068
7069               if (best_delta)
7070                 iv_ca_delta_free (&best_delta);
7071               best_delta = act_delta;
7072             }
7073           else
7074             iv_ca_delta_free (&act_delta);
7075         }
7076     }
7077
7078   iv_ca_delta_commit (data, ivs, best_delta, true);
7079   iv_ca_delta_free (&best_delta);
7080
7081   return !best_cost.infinite_cost_p ();
7082 }
7083
7084 /* Finds an initial assignment of candidates to uses.  */
7085
7086 static class iv_ca *
7087 get_initial_solution (struct ivopts_data *data, bool originalp)
7088 {
7089   unsigned i;
7090   class iv_ca *ivs = iv_ca_new (data);
7091
7092   for (i = 0; i < data->vgroups.length (); i++)
7093     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7094       {
7095         iv_ca_free (&ivs);
7096         return NULL;
7097       }
7098
7099   return ivs;
7100 }
7101
7102 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
7103    points to a bool variable, this function tries to break local
7104    optimal fixed-point by replacing candidates in IVS if it's true.  */
7105
7106 static bool
7107 try_improve_iv_set (struct ivopts_data *data,
7108                     class iv_ca *ivs, bool *try_replace_p)
7109 {
7110   unsigned i, n_ivs;
7111   comp_cost acost, best_cost = iv_ca_cost (ivs);
7112   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7113   struct iv_cand *cand;
7114
7115   /* Try extending the set of induction variables by one.  */
7116   for (i = 0; i < data->vcands.length (); i++)
7117     {
7118       cand = data->vcands[i];
7119
7120       if (iv_ca_cand_used_p (ivs, cand))
7121         continue;
7122
7123       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7124       if (!act_delta)
7125         continue;
7126
7127       /* If we successfully added the candidate and the set is small enough,
7128          try optimizing it by removing other candidates.  */
7129       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7130         {
7131           iv_ca_delta_commit (data, ivs, act_delta, true);
7132           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7133           iv_ca_delta_commit (data, ivs, act_delta, false);
7134           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7135         }
7136
7137       if (acost < best_cost)
7138         {
7139           best_cost = acost;
7140           iv_ca_delta_free (&best_delta);
7141           best_delta = act_delta;
7142         }
7143       else
7144         iv_ca_delta_free (&act_delta);
7145     }
7146
7147   if (!best_delta)
7148     {
7149       /* Try removing the candidates from the set instead.  */
7150       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7151
7152       if (!best_delta && *try_replace_p)
7153         {
7154           *try_replace_p = false;
7155           /* So far candidate selecting algorithm tends to choose fewer IVs
7156              so that it can handle cases in which loops have many variables
7157              but the best choice is often to use only one general biv.  One
7158              weakness is it can't handle opposite cases, in which different
7159              candidates should be chosen with respect to each use.  To solve
7160              the problem, we replace candidates in a manner described by the
7161              comments of iv_ca_replace, thus give general algorithm a chance
7162              to break local optimal fixed-point in these cases.  */
7163           best_cost = iv_ca_replace (data, ivs, &best_delta);
7164         }
7165
7166       if (!best_delta)
7167         return false;
7168     }
7169
7170   iv_ca_delta_commit (data, ivs, best_delta, true);
7171   iv_ca_delta_free (&best_delta);
7172   return best_cost == iv_ca_cost (ivs);
7173 }
7174
7175 /* Attempts to find the optimal set of induction variables.  We do simple
7176    greedy heuristic -- we try to replace at most one candidate in the selected
7177    solution and remove the unused ivs while this improves the cost.  */
7178
7179 static class iv_ca *
7180 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7181 {
7182   class iv_ca *set;
7183   bool try_replace_p = true;
7184
7185   /* Get the initial solution.  */
7186   set = get_initial_solution (data, originalp);
7187   if (!set)
7188     {
7189       if (dump_file && (dump_flags & TDF_DETAILS))
7190         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7191       return NULL;
7192     }
7193
7194   if (dump_file && (dump_flags & TDF_DETAILS))
7195     {
7196       fprintf (dump_file, "Initial set of candidates:\n");
7197       iv_ca_dump (data, dump_file, set);
7198     }
7199
7200   while (try_improve_iv_set (data, set, &try_replace_p))
7201     {
7202       if (dump_file && (dump_flags & TDF_DETAILS))
7203         {
7204           fprintf (dump_file, "Improved to:\n");
7205           iv_ca_dump (data, dump_file, set);
7206         }
7207     }
7208
7209   /* If the set has infinite_cost, it can't be optimal.  */
7210   if (iv_ca_cost (set).infinite_cost_p ())
7211     {
7212       if (dump_file && (dump_flags & TDF_DETAILS))
7213         fprintf (dump_file,
7214                  "Overflow to infinite cost in try_improve_iv_set.\n");
7215       iv_ca_free (&set);
7216     }
7217   return set;
7218 }
7219
7220 static class iv_ca *
7221 find_optimal_iv_set (struct ivopts_data *data)
7222 {
7223   unsigned i;
7224   comp_cost cost, origcost;
7225   class iv_ca *set, *origset;
7226
7227   /* Determine the cost based on a strategy that starts with original IVs,
7228      and try again using a strategy that prefers candidates not based
7229      on any IVs.  */
7230   origset = find_optimal_iv_set_1 (data, true);
7231   set = find_optimal_iv_set_1 (data, false);
7232
7233   if (!origset && !set)
7234     return NULL;
7235
7236   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7237   cost = set ? iv_ca_cost (set) : infinite_cost;
7238
7239   if (dump_file && (dump_flags & TDF_DETAILS))
7240     {
7241       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7242                origcost.cost, origcost.complexity);
7243       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7244                cost.cost, cost.complexity);
7245     }
7246
7247   /* Choose the one with the best cost.  */
7248   if (origcost <= cost)
7249     {
7250       if (set)
7251         iv_ca_free (&set);
7252       set = origset;
7253     }
7254   else if (origset)
7255     iv_ca_free (&origset);
7256
7257   for (i = 0; i < data->vgroups.length (); i++)
7258     {
7259       struct iv_group *group = data->vgroups[i];
7260       group->selected = iv_ca_cand_for_group (set, group)->cand;
7261     }
7262
7263   return set;
7264 }
7265
7266 /* Creates a new induction variable corresponding to CAND.  */
7267
7268 static void
7269 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7270 {
7271   gimple_stmt_iterator incr_pos;
7272   tree base;
7273   struct iv_use *use;
7274   struct iv_group *group;
7275   bool after = false;
7276
7277   gcc_assert (cand->iv != NULL);
7278
7279   switch (cand->pos)
7280     {
7281     case IP_NORMAL:
7282       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7283       break;
7284
7285     case IP_END:
7286       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7287       after = true;
7288       if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7289         {
7290           edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7291           incr_pos = gsi_after_labels (split_edge (e));
7292           after = false;
7293         }
7294       break;
7295
7296     case IP_AFTER_USE:
7297       after = true;
7298       /* fall through */
7299     case IP_BEFORE_USE:
7300       incr_pos = gsi_for_stmt (cand->incremented_at);
7301       break;
7302
7303     case IP_ORIGINAL:
7304       /* Mark that the iv is preserved.  */
7305       name_info (data, cand->var_before)->preserve_biv = true;
7306       name_info (data, cand->var_after)->preserve_biv = true;
7307
7308       /* Rewrite the increment so that it uses var_before directly.  */
7309       use = find_interesting_uses_op (data, cand->var_after);
7310       group = data->vgroups[use->group_id];
7311       group->selected = cand;
7312       return;
7313     }
7314
7315   gimple_add_tmp_var (cand->var_before);
7316
7317   base = unshare_expr (cand->iv->base);
7318
7319   create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7320              cand->var_before, data->current_loop,
7321              &incr_pos, after, &cand->var_before, &cand->var_after);
7322 }
7323
7324 /* Creates new induction variables described in SET.  */
7325
7326 static void
7327 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7328 {
7329   unsigned i;
7330   struct iv_cand *cand;
7331   bitmap_iterator bi;
7332
7333   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7334     {
7335       cand = data->vcands[i];
7336       create_new_iv (data, cand);
7337     }
7338
7339   if (dump_file && (dump_flags & TDF_DETAILS))
7340     {
7341       fprintf (dump_file, "Selected IV set for loop %d",
7342                data->current_loop->num);
7343       if (data->loop_loc != UNKNOWN_LOCATION)
7344         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7345                  LOCATION_LINE (data->loop_loc));
7346       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7347                avg_loop_niter (data->current_loop));
7348       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7349       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7350         {
7351           cand = data->vcands[i];
7352           dump_cand (dump_file, cand);
7353         }
7354       fprintf (dump_file, "\n");
7355     }
7356 }
7357
7358 /* Rewrites USE (definition of iv used in a nonlinear expression)
7359    using candidate CAND.  */
7360
7361 static void
7362 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7363                             struct iv_use *use, struct iv_cand *cand)
7364 {
7365   gassign *ass;
7366   gimple_stmt_iterator bsi;
7367   tree comp, type = get_use_type (use), tgt;
7368
7369   /* An important special case -- if we are asked to express value of
7370      the original iv by itself, just exit; there is no need to
7371      introduce a new computation (that might also need casting the
7372      variable to unsigned and back).  */
7373   if (cand->pos == IP_ORIGINAL
7374       && cand->incremented_at == use->stmt)
7375     {
7376       tree op = NULL_TREE;
7377       enum tree_code stmt_code;
7378
7379       gcc_assert (is_gimple_assign (use->stmt));
7380       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7381
7382       /* Check whether we may leave the computation unchanged.
7383          This is the case only if it does not rely on other
7384          computations in the loop -- otherwise, the computation
7385          we rely upon may be removed in remove_unused_ivs,
7386          thus leading to ICE.  */
7387       stmt_code = gimple_assign_rhs_code (use->stmt);
7388       if (stmt_code == PLUS_EXPR
7389           || stmt_code == MINUS_EXPR
7390           || stmt_code == POINTER_PLUS_EXPR)
7391         {
7392           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7393             op = gimple_assign_rhs2 (use->stmt);
7394           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7395             op = gimple_assign_rhs1 (use->stmt);
7396         }
7397
7398       if (op != NULL_TREE)
7399         {
7400           if (expr_invariant_in_loop_p (data->current_loop, op))
7401             return;
7402           if (TREE_CODE (op) == SSA_NAME)
7403             {
7404               struct iv *iv = get_iv (data, op);
7405               if (iv != NULL && integer_zerop (iv->step))
7406                 return;
7407             }
7408         }
7409     }
7410
7411   switch (gimple_code (use->stmt))
7412     {
7413     case GIMPLE_PHI:
7414       tgt = PHI_RESULT (use->stmt);
7415
7416       /* If we should keep the biv, do not replace it.  */
7417       if (name_info (data, tgt)->preserve_biv)
7418         return;
7419
7420       bsi = gsi_after_labels (gimple_bb (use->stmt));
7421       break;
7422
7423     case GIMPLE_ASSIGN:
7424       tgt = gimple_assign_lhs (use->stmt);
7425       bsi = gsi_for_stmt (use->stmt);
7426       break;
7427
7428     default:
7429       gcc_unreachable ();
7430     }
7431
7432   aff_tree aff_inv, aff_var;
7433   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7434                               use, cand, &aff_inv, &aff_var))
7435     gcc_unreachable ();
7436
7437   unshare_aff_combination (&aff_inv);
7438   unshare_aff_combination (&aff_var);
7439   /* Prefer CSE opportunity than loop invariant by adding offset at last
7440      so that iv_uses have different offsets can be CSEed.  */
7441   poly_widest_int offset = aff_inv.offset;
7442   aff_inv.offset = 0;
7443
7444   gimple_seq stmt_list = NULL, seq = NULL;
7445   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7446   tree comp_op2 = aff_combination_to_tree (&aff_var);
7447   gcc_assert (comp_op1 && comp_op2);
7448
7449   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7450   gimple_seq_add_seq (&stmt_list, seq);
7451   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7452   gimple_seq_add_seq (&stmt_list, seq);
7453
7454   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7455     std::swap (comp_op1, comp_op2);
7456
7457   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7458     {
7459       comp = fold_build_pointer_plus (comp_op1,
7460                                       fold_convert (sizetype, comp_op2));
7461       comp = fold_build_pointer_plus (comp,
7462                                       wide_int_to_tree (sizetype, offset));
7463     }
7464   else
7465     {
7466       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7467                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7468       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7469                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7470     }
7471
7472   comp = fold_convert (type, comp);
7473   comp = force_gimple_operand (comp, &seq, false, NULL);
7474   gimple_seq_add_seq (&stmt_list, seq);
7475   if (gimple_code (use->stmt) != GIMPLE_PHI
7476       /* We can't allow re-allocating the stmt as it might be pointed
7477          to still.  */
7478       && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7479           >= gimple_num_ops (gsi_stmt (bsi))))
7480     {
7481       comp = force_gimple_operand (comp, &seq, true, NULL);
7482       gimple_seq_add_seq (&stmt_list, seq);
7483       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7484         {
7485           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7486           /* As this isn't a plain copy we have to reset alignment
7487              information.  */
7488           if (SSA_NAME_PTR_INFO (comp))
7489             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7490         }
7491     }
7492
7493   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7494   if (gimple_code (use->stmt) == GIMPLE_PHI)
7495     {
7496       ass = gimple_build_assign (tgt, comp);
7497       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7498
7499       bsi = gsi_for_stmt (use->stmt);
7500       remove_phi_node (&bsi, false);
7501     }
7502   else
7503     {
7504       gimple_assign_set_rhs_from_tree (&bsi, comp);
7505       use->stmt = gsi_stmt (bsi);
7506     }
7507 }
7508
7509 /* Performs a peephole optimization to reorder the iv update statement with
7510    a mem ref to enable instruction combining in later phases. The mem ref uses
7511    the iv value before the update, so the reordering transformation requires
7512    adjustment of the offset. CAND is the selected IV_CAND.
7513
7514    Example:
7515
7516    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7517    iv2 = iv1 + 1;
7518
7519    if (t < val)      (1)
7520      goto L;
7521    goto Head;
7522
7523
7524    directly propagating t over to (1) will introduce overlapping live range
7525    thus increase register pressure. This peephole transform it into:
7526
7527
7528    iv2 = iv1 + 1;
7529    t = MEM_REF (base, iv2, 8, 8);
7530    if (t < val)
7531      goto L;
7532    goto Head;
7533 */
7534
7535 static void
7536 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7537 {
7538   tree var_after;
7539   gimple *iv_update, *stmt;
7540   basic_block bb;
7541   gimple_stmt_iterator gsi, gsi_iv;
7542
7543   if (cand->pos != IP_NORMAL)
7544     return;
7545
7546   var_after = cand->var_after;
7547   iv_update = SSA_NAME_DEF_STMT (var_after);
7548
7549   bb = gimple_bb (iv_update);
7550   gsi = gsi_last_nondebug_bb (bb);
7551   stmt = gsi_stmt (gsi);
7552
7553   /* Only handle conditional statement for now.  */
7554   if (gimple_code (stmt) != GIMPLE_COND)
7555     return;
7556
7557   gsi_prev_nondebug (&gsi);
7558   stmt = gsi_stmt (gsi);
7559   if (stmt != iv_update)
7560     return;
7561
7562   gsi_prev_nondebug (&gsi);
7563   if (gsi_end_p (gsi))
7564     return;
7565
7566   stmt = gsi_stmt (gsi);
7567   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7568     return;
7569
7570   if (stmt != use->stmt)
7571     return;
7572
7573   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7574     return;
7575
7576   if (dump_file && (dump_flags & TDF_DETAILS))
7577     {
7578       fprintf (dump_file, "Reordering \n");
7579       print_gimple_stmt (dump_file, iv_update, 0);
7580       print_gimple_stmt (dump_file, use->stmt, 0);
7581       fprintf (dump_file, "\n");
7582     }
7583
7584   gsi = gsi_for_stmt (use->stmt);
7585   gsi_iv = gsi_for_stmt (iv_update);
7586   gsi_move_before (&gsi_iv, &gsi);
7587
7588   cand->pos = IP_BEFORE_USE;
7589   cand->incremented_at = use->stmt;
7590 }
7591
7592 /* Return the alias pointer type that should be used for a MEM_REF
7593    associated with USE, which has type USE_PTR_ADDRESS.  */
7594
7595 static tree
7596 get_alias_ptr_type_for_ptr_address (iv_use *use)
7597 {
7598   gcall *call = as_a <gcall *> (use->stmt);
7599   switch (gimple_call_internal_fn (call))
7600     {
7601     case IFN_MASK_LOAD:
7602     case IFN_MASK_STORE:
7603     case IFN_MASK_LOAD_LANES:
7604     case IFN_MASK_STORE_LANES:
7605     case IFN_MASK_LEN_LOAD_LANES:
7606     case IFN_MASK_LEN_STORE_LANES:
7607     case IFN_LEN_LOAD:
7608     case IFN_LEN_STORE:
7609     case IFN_MASK_LEN_LOAD:
7610     case IFN_MASK_LEN_STORE:
7611       /* The second argument contains the correct alias type.  */
7612       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7613       return TREE_TYPE (gimple_call_arg (call, 1));
7614
7615     default:
7616       gcc_unreachable ();
7617     }
7618 }
7619
7620
7621 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7622
7623 static void
7624 rewrite_use_address (struct ivopts_data *data,
7625                      struct iv_use *use, struct iv_cand *cand)
7626 {
7627   aff_tree aff;
7628   bool ok;
7629
7630   adjust_iv_update_pos (cand, use);
7631   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7632   gcc_assert (ok);
7633   unshare_aff_combination (&aff);
7634
7635   /* To avoid undefined overflow problems, all IV candidates use unsigned
7636      integer types.  The drawback is that this makes it impossible for
7637      create_mem_ref to distinguish an IV that is based on a memory object
7638      from one that represents simply an offset.
7639
7640      To work around this problem, we pass a hint to create_mem_ref that
7641      indicates which variable (if any) in aff is an IV based on a memory
7642      object.  Note that we only consider the candidate.  If this is not
7643      based on an object, the base of the reference is in some subexpression
7644      of the use -- but these will use pointer types, so they are recognized
7645      by the create_mem_ref heuristics anyway.  */
7646   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7647   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7648   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7649   tree type = use->mem_type;
7650   tree alias_ptr_type;
7651   if (use->type == USE_PTR_ADDRESS)
7652     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7653   else
7654     {
7655       gcc_assert (type == TREE_TYPE (*use->op_p));
7656       unsigned int align = get_object_alignment (*use->op_p);
7657       if (align != TYPE_ALIGN (type))
7658         type = build_aligned_type (type, align);
7659       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7660     }
7661   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7662                              iv, base_hint, data->speed);
7663
7664   if (use->type == USE_PTR_ADDRESS)
7665     {
7666       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7667       ref = fold_convert (get_use_type (use), ref);
7668       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7669                                       true, GSI_SAME_STMT);
7670     }
7671   else
7672     {
7673       /* When we end up confused enough and have no suitable base but
7674          stuffed everything to index2 use a LEA for the address and
7675          create a plain MEM_REF to avoid basing a memory reference
7676          on address zero which create_mem_ref_raw does as fallback.  */
7677       if (TREE_CODE (ref) == TARGET_MEM_REF
7678           && TMR_INDEX2 (ref) != NULL_TREE
7679           && integer_zerop (TREE_OPERAND (ref, 0)))
7680         {
7681           ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7682           ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7683                                           true, GSI_SAME_STMT);
7684           ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7685         }
7686       copy_ref_info (ref, *use->op_p);
7687     }
7688
7689   *use->op_p = ref;
7690 }
7691
7692 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7693    candidate CAND.  */
7694
7695 static void
7696 rewrite_use_compare (struct ivopts_data *data,
7697                      struct iv_use *use, struct iv_cand *cand)
7698 {
7699   tree comp, op, bound;
7700   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7701   enum tree_code compare;
7702   struct iv_group *group = data->vgroups[use->group_id];
7703   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7704
7705   bound = cp->value;
7706   if (bound)
7707     {
7708       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7709       tree var_type = TREE_TYPE (var);
7710       gimple_seq stmts;
7711
7712       if (dump_file && (dump_flags & TDF_DETAILS))
7713         {
7714           fprintf (dump_file, "Replacing exit test: ");
7715           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7716         }
7717       compare = cp->comp;
7718       bound = unshare_expr (fold_convert (var_type, bound));
7719       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7720       if (stmts)
7721         gsi_insert_seq_on_edge_immediate (
7722                 loop_preheader_edge (data->current_loop),
7723                 stmts);
7724
7725       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7726       gimple_cond_set_lhs (cond_stmt, var);
7727       gimple_cond_set_code (cond_stmt, compare);
7728       gimple_cond_set_rhs (cond_stmt, op);
7729       return;
7730     }
7731
7732   /* The induction variable elimination failed; just express the original
7733      giv.  */
7734   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7735   gcc_assert (comp != NULL_TREE);
7736   gcc_assert (use->op_p != NULL);
7737   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7738                                          SSA_NAME_VAR (*use->op_p),
7739                                          true, GSI_SAME_STMT);
7740 }
7741
7742 /* Rewrite the groups using the selected induction variables.  */
7743
7744 static void
7745 rewrite_groups (struct ivopts_data *data)
7746 {
7747   unsigned i, j;
7748
7749   for (i = 0; i < data->vgroups.length (); i++)
7750     {
7751       struct iv_group *group = data->vgroups[i];
7752       struct iv_cand *cand = group->selected;
7753
7754       gcc_assert (cand);
7755
7756       if (group->type == USE_NONLINEAR_EXPR)
7757         {
7758           for (j = 0; j < group->vuses.length (); j++)
7759             {
7760               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7761               update_stmt (group->vuses[j]->stmt);
7762             }
7763         }
7764       else if (address_p (group->type))
7765         {
7766           for (j = 0; j < group->vuses.length (); j++)
7767             {
7768               rewrite_use_address (data, group->vuses[j], cand);
7769               update_stmt (group->vuses[j]->stmt);
7770             }
7771         }
7772       else
7773         {
7774           gcc_assert (group->type == USE_COMPARE);
7775
7776           for (j = 0; j < group->vuses.length (); j++)
7777             {
7778               rewrite_use_compare (data, group->vuses[j], cand);
7779               update_stmt (group->vuses[j]->stmt);
7780             }
7781         }
7782     }
7783 }
7784
7785 /* Removes the ivs that are not used after rewriting.  */
7786
7787 static void
7788 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7789 {
7790   unsigned j;
7791   bitmap_iterator bi;
7792
7793   /* Figure out an order in which to release SSA DEFs so that we don't
7794      release something that we'd have to propagate into a debug stmt
7795      afterwards.  */
7796   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7797     {
7798       struct version_info *info;
7799
7800       info = ver_info (data, j);
7801       if (info->iv
7802           && !integer_zerop (info->iv->step)
7803           && !info->inv_id
7804           && !info->iv->nonlin_use
7805           && !info->preserve_biv)
7806         {
7807           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7808
7809           tree def = info->iv->ssa_name;
7810
7811           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7812             {
7813               imm_use_iterator imm_iter;
7814               use_operand_p use_p;
7815               gimple *stmt;
7816               int count = 0;
7817
7818               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7819                 {
7820                   if (!gimple_debug_bind_p (stmt))
7821                     continue;
7822
7823                   /* We just want to determine whether to do nothing
7824                      (count == 0), to substitute the computed
7825                      expression into a single use of the SSA DEF by
7826                      itself (count == 1), or to use a debug temp
7827                      because the SSA DEF is used multiple times or as
7828                      part of a larger expression (count > 1). */
7829                   count++;
7830                   if (gimple_debug_bind_get_value (stmt) != def)
7831                     count++;
7832
7833                   if (count > 1)
7834                     break;
7835                 }
7836
7837               if (!count)
7838                 continue;
7839
7840               struct iv_use dummy_use;
7841               struct iv_cand *best_cand = NULL, *cand;
7842               unsigned i, best_pref = 0, cand_pref;
7843               tree comp = NULL_TREE;
7844
7845               memset (&dummy_use, 0, sizeof (dummy_use));
7846               dummy_use.iv = info->iv;
7847               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7848                 {
7849                   cand = data->vgroups[i]->selected;
7850                   if (cand == best_cand)
7851                     continue;
7852                   cand_pref = operand_equal_p (cand->iv->step,
7853                                                info->iv->step, 0)
7854                     ? 4 : 0;
7855                   cand_pref
7856                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7857                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7858                     ? 2 : 0;
7859                   cand_pref
7860                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7861                     ? 1 : 0;
7862                   if (best_cand == NULL || best_pref < cand_pref)
7863                     {
7864                       tree this_comp
7865                         = get_debug_computation_at (data->current_loop,
7866                                                     SSA_NAME_DEF_STMT (def),
7867                                                     &dummy_use, cand);
7868                       if (this_comp)
7869                         {
7870                           best_cand = cand;
7871                           best_pref = cand_pref;
7872                           comp = this_comp;
7873                         }
7874                     }
7875                 }
7876
7877               if (!best_cand)
7878                 continue;
7879
7880               comp = unshare_expr (comp);
7881               if (count > 1)
7882                 {
7883                   tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7884                   /* FIXME: Is setting the mode really necessary? */
7885                   if (SSA_NAME_VAR (def))
7886                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7887                   else
7888                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7889                   gdebug *def_temp
7890                     = gimple_build_debug_bind (vexpr, comp, NULL);
7891                   gimple_stmt_iterator gsi;
7892
7893                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7894                     gsi = gsi_after_labels (gimple_bb
7895                                             (SSA_NAME_DEF_STMT (def)));
7896                   else
7897                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7898
7899                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7900                   comp = vexpr;
7901                 }
7902
7903               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7904                 {
7905                   if (!gimple_debug_bind_p (stmt))
7906                     continue;
7907
7908                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7909                     SET_USE (use_p, comp);
7910
7911                   update_stmt (stmt);
7912                 }
7913             }
7914         }
7915     }
7916 }
7917
7918 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7919    for hash_map::traverse.  */
7920
7921 bool
7922 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7923 {
7924   if (value)
7925     {
7926       value->~tree_niter_desc ();
7927       free (value);
7928     }
7929   return true;
7930 }
7931
7932 /* Frees data allocated by the optimization of a single loop.  */
7933
7934 static void
7935 free_loop_data (struct ivopts_data *data)
7936 {
7937   unsigned i, j;
7938   bitmap_iterator bi;
7939   tree obj;
7940
7941   if (data->niters)
7942     {
7943       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7944       delete data->niters;
7945       data->niters = NULL;
7946     }
7947
7948   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7949     {
7950       struct version_info *info;
7951
7952       info = ver_info (data, i);
7953       info->iv = NULL;
7954       info->has_nonlin_use = false;
7955       info->preserve_biv = false;
7956       info->inv_id = 0;
7957     }
7958   bitmap_clear (data->relevant);
7959   bitmap_clear (data->important_candidates);
7960
7961   for (i = 0; i < data->vgroups.length (); i++)
7962     {
7963       struct iv_group *group = data->vgroups[i];
7964
7965       for (j = 0; j < group->vuses.length (); j++)
7966         free (group->vuses[j]);
7967       group->vuses.release ();
7968
7969       BITMAP_FREE (group->related_cands);
7970       for (j = 0; j < group->n_map_members; j++)
7971         {
7972           if (group->cost_map[j].inv_vars)
7973             BITMAP_FREE (group->cost_map[j].inv_vars);
7974           if (group->cost_map[j].inv_exprs)
7975             BITMAP_FREE (group->cost_map[j].inv_exprs);
7976         }
7977
7978       free (group->cost_map);
7979       free (group);
7980     }
7981   data->vgroups.truncate (0);
7982
7983   for (i = 0; i < data->vcands.length (); i++)
7984     {
7985       struct iv_cand *cand = data->vcands[i];
7986
7987       if (cand->inv_vars)
7988         BITMAP_FREE (cand->inv_vars);
7989       if (cand->inv_exprs)
7990         BITMAP_FREE (cand->inv_exprs);
7991       free (cand);
7992     }
7993   data->vcands.truncate (0);
7994
7995   if (data->version_info_size < num_ssa_names)
7996     {
7997       data->version_info_size = 2 * num_ssa_names;
7998       free (data->version_info);
7999       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8000     }
8001
8002   data->max_inv_var_id = 0;
8003   data->max_inv_expr_id = 0;
8004
8005   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
8006     SET_DECL_RTL (obj, NULL_RTX);
8007
8008   decl_rtl_to_reset.truncate (0);
8009
8010   data->inv_expr_tab->empty ();
8011
8012   data->iv_common_cand_tab->empty ();
8013   data->iv_common_cands.truncate (0);
8014 }
8015
8016 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
8017    loop tree.  */
8018
8019 static void
8020 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8021 {
8022   free_loop_data (data);
8023   free (data->version_info);
8024   BITMAP_FREE (data->relevant);
8025   BITMAP_FREE (data->important_candidates);
8026
8027   decl_rtl_to_reset.release ();
8028   data->vgroups.release ();
8029   data->vcands.release ();
8030   delete data->inv_expr_tab;
8031   data->inv_expr_tab = NULL;
8032   free_affine_expand_cache (&data->name_expansion_cache);
8033   if (data->base_object_map)
8034     delete data->base_object_map;
8035   delete data->iv_common_cand_tab;
8036   data->iv_common_cand_tab = NULL;
8037   data->iv_common_cands.release ();
8038   obstack_free (&data->iv_obstack, NULL);
8039 }
8040
8041 /* Returns true if the loop body BODY includes any function calls.  */
8042
8043 static bool
8044 loop_body_includes_call (basic_block *body, unsigned num_nodes)
8045 {
8046   gimple_stmt_iterator gsi;
8047   unsigned i;
8048
8049   for (i = 0; i < num_nodes; i++)
8050     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8051       {
8052         gimple *stmt = gsi_stmt (gsi);
8053         if (is_gimple_call (stmt)
8054             && !gimple_call_internal_p (stmt)
8055             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8056           return true;
8057       }
8058   return false;
8059 }
8060
8061 /* Determine cost scaling factor for basic blocks in loop.  */
8062 #define COST_SCALING_FACTOR_BOUND (20)
8063
8064 static void
8065 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8066 {
8067   int lfreq = data->current_loop->header->count.to_frequency (cfun);
8068   if (!data->speed || lfreq <= 0)
8069     return;
8070
8071   int max_freq = lfreq;
8072   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8073     {
8074       body[i]->aux = (void *)(intptr_t) 1;
8075       if (max_freq < body[i]->count.to_frequency (cfun))
8076         max_freq = body[i]->count.to_frequency (cfun);
8077     }
8078   if (max_freq > lfreq)
8079     {
8080       int divisor, factor;
8081       /* Check if scaling factor itself needs to be scaled by the bound.  This
8082          is to avoid overflow when scaling cost according to profile info.  */
8083       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8084         {
8085           divisor = max_freq;
8086           factor = COST_SCALING_FACTOR_BOUND;
8087         }
8088       else
8089         {
8090           divisor = lfreq;
8091           factor = 1;
8092         }
8093       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8094         {
8095           int bfreq = body[i]->count.to_frequency (cfun);
8096           if (bfreq <= lfreq)
8097             continue;
8098
8099           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8100         }
8101     }
8102 }
8103
8104 /* Find doloop comparison use and set its doloop_p on if found.  */
8105
8106 static bool
8107 find_doloop_use (struct ivopts_data *data)
8108 {
8109   struct loop *loop = data->current_loop;
8110
8111   for (unsigned i = 0; i < data->vgroups.length (); i++)
8112     {
8113       struct iv_group *group = data->vgroups[i];
8114       if (group->type == USE_COMPARE)
8115         {
8116           gcc_assert (group->vuses.length () == 1);
8117           struct iv_use *use = group->vuses[0];
8118           gimple *stmt = use->stmt;
8119           if (gimple_code (stmt) == GIMPLE_COND)
8120             {
8121               basic_block bb = gimple_bb (stmt);
8122               edge true_edge, false_edge;
8123               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8124               /* This comparison is used for loop latch.  Require latch is empty
8125                  for now.  */
8126               if ((loop->latch == true_edge->dest
8127                    || loop->latch == false_edge->dest)
8128                   && empty_block_p (loop->latch))
8129                 {
8130                   group->doloop_p = true;
8131                   if (dump_file && (dump_flags & TDF_DETAILS))
8132                     {
8133                       fprintf (dump_file, "Doloop cmp iv use: ");
8134                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8135                     }
8136                   return true;
8137                 }
8138             }
8139         }
8140     }
8141
8142   return false;
8143 }
8144
8145 /* For the targets which support doloop, to predict whether later RTL doloop
8146    transformation will perform on this loop, further detect the doloop use and
8147    mark the flag doloop_use_p if predicted.  */
8148
8149 void
8150 analyze_and_mark_doloop_use (struct ivopts_data *data)
8151 {
8152   data->doloop_use_p = false;
8153
8154   if (!flag_branch_on_count_reg)
8155     return;
8156
8157   if (data->current_loop->unroll == USHRT_MAX)
8158     return;
8159
8160   if (!generic_predict_doloop_p (data))
8161     return;
8162
8163   if (find_doloop_use (data))
8164     {
8165       data->doloop_use_p = true;
8166       if (dump_file && (dump_flags & TDF_DETAILS))
8167         {
8168           struct loop *loop = data->current_loop;
8169           fprintf (dump_file,
8170                    "Predict loop %d can perform"
8171                    " doloop optimization later.\n",
8172                    loop->num);
8173           flow_loop_dump (loop, dump_file, NULL, 1);
8174         }
8175     }
8176 }
8177
8178 /* Optimizes the LOOP.  Returns true if anything changed.  */
8179
8180 static bool
8181 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8182                            bitmap toremove)
8183 {
8184   bool changed = false;
8185   class iv_ca *iv_ca;
8186   edge exit = single_dom_exit (loop);
8187   basic_block *body;
8188
8189   gcc_assert (!data->niters);
8190   data->current_loop = loop;
8191   data->loop_loc = find_loop_location (loop).get_location_t ();
8192   data->speed = optimize_loop_for_speed_p (loop);
8193
8194   if (dump_file && (dump_flags & TDF_DETAILS))
8195     {
8196       fprintf (dump_file, "Processing loop %d", loop->num);
8197       if (data->loop_loc != UNKNOWN_LOCATION)
8198         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8199                  LOCATION_LINE (data->loop_loc));
8200       fprintf (dump_file, "\n");
8201
8202       if (exit)
8203         {
8204           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
8205                    exit->src->index, exit->dest->index);
8206           print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8207                              0, TDF_SLIM);
8208           fprintf (dump_file, "\n");
8209         }
8210
8211       fprintf (dump_file, "\n");
8212     }
8213
8214   body = get_loop_body (loop);
8215   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8216   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8217
8218   data->loop_single_exit_p
8219     = exit != NULL && loop_only_exit_p (loop, body, exit);
8220
8221   /* For each ssa name determines whether it behaves as an induction variable
8222      in some loop.  */
8223   if (!find_induction_variables (data, body))
8224     goto finish;
8225
8226   /* Finds interesting uses (item 1).  */
8227   find_interesting_uses (data, body);
8228   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8229     goto finish;
8230
8231   /* Determine cost scaling factor for basic blocks in loop.  */
8232   determine_scaling_factor (data, body);
8233
8234   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8235   analyze_and_mark_doloop_use (data);
8236
8237   /* Finds candidates for the induction variables (item 2).  */
8238   find_iv_candidates (data);
8239
8240   /* Calculates the costs (item 3, part 1).  */
8241   determine_iv_costs (data);
8242   determine_group_iv_costs (data);
8243   determine_set_costs (data);
8244
8245   /* Find the optimal set of induction variables (item 3, part 2).  */
8246   iv_ca = find_optimal_iv_set (data);
8247   /* Cleanup basic block aux field.  */
8248   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8249     body[i]->aux = NULL;
8250   if (!iv_ca)
8251     goto finish;
8252   changed = true;
8253
8254   /* Create the new induction variables (item 4, part 1).  */
8255   create_new_ivs (data, iv_ca);
8256   iv_ca_free (&iv_ca);
8257
8258   /* Rewrite the uses (item 4, part 2).  */
8259   rewrite_groups (data);
8260
8261   /* Remove the ivs that are unused after rewriting.  */
8262   remove_unused_ivs (data, toremove);
8263
8264 finish:
8265   free (body);
8266   free_loop_data (data);
8267
8268   return changed;
8269 }
8270
8271 /* Main entry point.  Optimizes induction variables in loops.  */
8272
8273 void
8274 tree_ssa_iv_optimize (void)
8275 {
8276   struct ivopts_data data;
8277   auto_bitmap toremove;
8278
8279   tree_ssa_iv_optimize_init (&data);
8280   mark_ssa_maybe_undefs ();
8281
8282   /* Optimize the loops starting with the innermost ones.  */
8283   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8284     {
8285       if (!dbg_cnt (ivopts_loop))
8286         continue;
8287
8288       if (dump_file && (dump_flags & TDF_DETAILS))
8289         flow_loop_dump (loop, dump_file, NULL, 1);
8290
8291       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8292     }
8293
8294   /* Remove eliminated IV defs.  */
8295   release_defs_bitset (toremove);
8296
8297   /* We have changed the structure of induction variables; it might happen
8298      that definitions in the scev database refer to some of them that were
8299      eliminated.  */
8300   scev_reset_htab ();
8301   /* Likewise niter and control-IV information.  */
8302   free_numbers_of_iterations_estimates (cfun);
8303
8304   tree_ssa_iv_optimize_finalize (&data);
8305 }
8306
8307 #include "gt-tree-ssa-loop-ivopts.h"