gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2020 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134
 135 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 136    cost of different addressing modes.  This should be moved to a TBD
 137    interface between the GIMPLE and RTL worlds.  */
 138
 139 /* The infinite cost.  */
 140 #define INFTY 1000000000
 141
 142 /* Returns the expected number of loop iterations for LOOP.
 143    The average trip count is computed from profile data if it
 144    exists. */
 145
 146 static inline HOST_WIDE_INT
 147 avg_loop_niter (class loop *loop)
 148 {
 149   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 150   if (niter == -1)
 151     {
 152       niter = likely_max_stmt_executions_int (loop);
 153
 154       if (niter == -1 || niter > param_avg_loop_niter)
 155         return param_avg_loop_niter;
 156     }
 157
 158   return niter;
 159 }
 160
 161 struct iv_use;
 162
 163 /* Representation of the induction variable.  */
 164 struct iv
 165 {
 166   tree base;            /* Initial value of the iv.  */
 167   tree base_object;     /* A memory object to that the induction variable points.  */
 168   tree step;            /* Step of the iv (constant only).  */
 169   tree ssa_name;        /* The ssa name with the value.  */
 170   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 171   bool biv_p;           /* Is it a biv?  */
 172   bool no_overflow;     /* True if the iv doesn't overflow.  */
 173   bool have_address_use;/* For biv, indicate if it's used in any address
 174                            type use.  */
 175 };
 176
 177 /* Per-ssa version information (induction variable descriptions, etc.).  */
 178 struct version_info
 179 {
 180   tree name;            /* The ssa name.  */
 181   struct iv *iv;        /* Induction variable description.  */
 182   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 183                            an expression that is not an induction variable.  */
 184   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 185   unsigned inv_id;      /* Id of an invariant.  */
 186 };
 187
 188 /* Types of uses.  */
 189 enum use_type
 190 {
 191   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 192   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 193                            reference.  */
 194   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 195                            cases where the expansion of the function
 196                            will turn the argument into a normal address.  */
 197   USE_COMPARE           /* Use is a compare.  */
 198 };
 199
 200 /* Cost of a computation.  */
 201 class comp_cost
 202 {
 203 public:
 204   comp_cost (): cost (0), complexity (0), scratch (0)
 205   {}
 206
 207   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 208     : cost (cost), complexity (complexity), scratch (scratch)
 209   {}
 210
 211   /* Returns true if COST is infinite.  */
 212   bool infinite_cost_p ();
 213
 214   /* Adds costs COST1 and COST2.  */
 215   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 216
 217   /* Adds COST to the comp_cost.  */
 218   comp_cost operator+= (comp_cost cost);
 219
 220   /* Adds constant C to this comp_cost.  */
 221   comp_cost operator+= (HOST_WIDE_INT c);
 222
 223   /* Subtracts constant C to this comp_cost.  */
 224   comp_cost operator-= (HOST_WIDE_INT c);
 225
 226   /* Divide the comp_cost by constant C.  */
 227   comp_cost operator/= (HOST_WIDE_INT c);
 228
 229   /* Multiply the comp_cost by constant C.  */
 230   comp_cost operator*= (HOST_WIDE_INT c);
 231
 232   /* Subtracts costs COST1 and COST2.  */
 233   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 234
 235   /* Subtracts COST from this comp_cost.  */
 236   comp_cost operator-= (comp_cost cost);
 237
 238   /* Returns true if COST1 is smaller than COST2.  */
 239   friend bool operator< (comp_cost cost1, comp_cost cost2);
 240
 241   /* Returns true if COST1 and COST2 are equal.  */
 242   friend bool operator== (comp_cost cost1, comp_cost cost2);
 243
 244   /* Returns true if COST1 is smaller or equal than COST2.  */
 245   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 246
 247   int64_t cost;         /* The runtime cost.  */
 248   unsigned complexity;  /* The estimate of the complexity of the code for
 249                            the computation (in no concrete units --
 250                            complexity field should be larger for more
 251                            complex expressions and addressing modes).  */
 252   int64_t scratch;      /* Scratch used during cost computation.  */
 253 };
 254
 255 static const comp_cost no_cost;
 256 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 257
 258 bool
 259 comp_cost::infinite_cost_p ()
 260 {
 261   return cost == INFTY;
 262 }
 263
 264 comp_cost
 265 operator+ (comp_cost cost1, comp_cost cost2)
 266 {
 267   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 268     return infinite_cost;
 269
 270   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 271   cost1.cost += cost2.cost;
 272   cost1.complexity += cost2.complexity;
 273
 274   return cost1;
 275 }
 276
 277 comp_cost
 278 operator- (comp_cost cost1, comp_cost cost2)
 279 {
 280   if (cost1.infinite_cost_p ())
 281     return infinite_cost;
 282
 283   gcc_assert (!cost2.infinite_cost_p ());
 284   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 285
 286   cost1.cost -= cost2.cost;
 287   cost1.complexity -= cost2.complexity;
 288
 289   return cost1;
 290 }
 291
 292 comp_cost
 293 comp_cost::operator+= (comp_cost cost)
 294 {
 295   *this = *this + cost;
 296   return *this;
 297 }
 298
 299 comp_cost
 300 comp_cost::operator+= (HOST_WIDE_INT c)
 301 {
 302   if (c >= INFTY)
 303     this->cost = INFTY;
 304
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   gcc_assert (this->cost + c < infinite_cost.cost);
 309   this->cost += c;
 310
 311   return *this;
 312 }
 313
 314 comp_cost
 315 comp_cost::operator-= (HOST_WIDE_INT c)
 316 {
 317   if (infinite_cost_p ())
 318     return *this;
 319
 320   gcc_assert (this->cost - c < infinite_cost.cost);
 321   this->cost -= c;
 322
 323   return *this;
 324 }
 325
 326 comp_cost
 327 comp_cost::operator/= (HOST_WIDE_INT c)
 328 {
 329   gcc_assert (c != 0);
 330   if (infinite_cost_p ())
 331     return *this;
 332
 333   this->cost /= c;
 334
 335   return *this;
 336 }
 337
 338 comp_cost
 339 comp_cost::operator*= (HOST_WIDE_INT c)
 340 {
 341   if (infinite_cost_p ())
 342     return *this;
 343
 344   gcc_assert (this->cost * c < infinite_cost.cost);
 345   this->cost *= c;
 346
 347   return *this;
 348 }
 349
 350 comp_cost
 351 comp_cost::operator-= (comp_cost cost)
 352 {
 353   *this = *this - cost;
 354   return *this;
 355 }
 356
 357 bool
 358 operator< (comp_cost cost1, comp_cost cost2)
 359 {
 360   if (cost1.cost == cost2.cost)
 361     return cost1.complexity < cost2.complexity;
 362
 363   return cost1.cost < cost2.cost;
 364 }
 365
 366 bool
 367 operator== (comp_cost cost1, comp_cost cost2)
 368 {
 369   return cost1.cost == cost2.cost
 370     && cost1.complexity == cost2.complexity;
 371 }
 372
 373 bool
 374 operator<= (comp_cost cost1, comp_cost cost2)
 375 {
 376   return cost1 < cost2 || cost1 == cost2;
 377 }
 378
 379 struct iv_inv_expr_ent;
 380
 381 /* The candidate - cost pair.  */
 382 class cost_pair
 383 {
 384 public:
 385   struct iv_cand *cand; /* The candidate.  */
 386   comp_cost cost;       /* The cost.  */
 387   enum tree_code comp;  /* For iv elimination, the comparison.  */
 388   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 389                            preserved when representing iv_use with iv_cand.  */
 390   bitmap inv_exprs;     /* The list of newly created invariant expressions
 391                            when representing iv_use with iv_cand.  */
 392   tree value;           /* For final value elimination, the expression for
 393                            the final value of the iv.  For iv elimination,
 394                            the new bound to compare with.  */
 395 };
 396
 397 /* Use.  */
 398 struct iv_use
 399 {
 400   unsigned id;          /* The id of the use.  */
 401   unsigned group_id;    /* The group id the use belongs to.  */
 402   enum use_type type;   /* Type of the use.  */
 403   tree mem_type;        /* The memory type to use when testing whether an
 404                            address is legitimate, and what the address's
 405                            cost is.  */
 406   struct iv *iv;        /* The induction variable it is based on.  */
 407   gimple *stmt;         /* Statement in that it occurs.  */
 408   tree *op_p;           /* The place where it occurs.  */
 409
 410   tree addr_base;       /* Base address with const offset stripped.  */
 411   poly_uint64_pod addr_offset;
 412                         /* Const offset stripped from base address.  */
 413 };
 414
 415 /* Group of uses.  */
 416 struct iv_group
 417 {
 418   /* The id of the group.  */
 419   unsigned id;
 420   /* Uses of the group are of the same type.  */
 421   enum use_type type;
 422   /* The set of "related" IV candidates, plus the important ones.  */
 423   bitmap related_cands;
 424   /* Number of IV candidates in the cost_map.  */
 425   unsigned n_map_members;
 426   /* The costs wrto the iv candidates.  */
 427   class cost_pair *cost_map;
 428   /* The selected candidate for the group.  */
 429   struct iv_cand *selected;
 430   /* To indicate this is a doloop use group.  */
 431   bool doloop_p;
 432   /* Uses in the group.  */
 433   vec<struct iv_use *> vuses;
 434 };
 435
 436 /* The position where the iv is computed.  */
 437 enum iv_position
 438 {
 439   IP_NORMAL,            /* At the end, just before the exit condition.  */
 440   IP_END,               /* At the end of the latch block.  */
 441   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 442   IP_AFTER_USE,         /* Immediately after a specific use.  */
 443   IP_ORIGINAL           /* The original biv.  */
 444 };
 445
 446 /* The induction variable candidate.  */
 447 struct iv_cand
 448 {
 449   unsigned id;          /* The number of the candidate.  */
 450   bool important;       /* Whether this is an "important" candidate, i.e. such
 451                            that it should be considered by all uses.  */
 452   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 453   gimple *incremented_at;/* For original biv, the statement where it is
 454                            incremented.  */
 455   tree var_before;      /* The variable used for it before increment.  */
 456   tree var_after;       /* The variable used for it after increment.  */
 457   struct iv *iv;        /* The value of the candidate.  NULL for
 458                            "pseudocandidate" used to indicate the possibility
 459                            to replace the final value of an iv by direct
 460                            computation of the value.  */
 461   unsigned cost;        /* Cost of the candidate.  */
 462   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 463   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 464                               where it is incremented.  */
 465   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 466                            iv_cand.  */
 467   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 468                            hanlde it as a new invariant expression which will
 469                            be hoisted out of loop.  */
 470   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 471                            smaller type.  */
 472   bool doloop_p;        /* Whether this is a doloop candidate.  */
 473 };
 474
 475 /* Hashtable entry for common candidate derived from iv uses.  */
 476 class iv_common_cand
 477 {
 478 public:
 479   tree base;
 480   tree step;
 481   /* IV uses from which this common candidate is derived.  */
 482   auto_vec<struct iv_use *> uses;
 483   hashval_t hash;
 484 };
 485
 486 /* Hashtable helpers.  */
 487
 488 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 489 {
 490   static inline hashval_t hash (const iv_common_cand *);
 491   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 492 };
 493
 494 /* Hash function for possible common candidates.  */
 495
 496 inline hashval_t
 497 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 498 {
 499   return ccand->hash;
 500 }
 501
 502 /* Hash table equality function for common candidates.  */
 503
 504 inline bool
 505 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 506                               const iv_common_cand *ccand2)
 507 {
 508   return (ccand1->hash == ccand2->hash
 509           && operand_equal_p (ccand1->base, ccand2->base, 0)
 510           && operand_equal_p (ccand1->step, ccand2->step, 0)
 511           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 512               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 513 }
 514
 515 /* Loop invariant expression hashtable entry.  */
 516
 517 struct iv_inv_expr_ent
 518 {
 519   /* Tree expression of the entry.  */
 520   tree expr;
 521   /* Unique indentifier.  */
 522   int id;
 523   /* Hash value.  */
 524   hashval_t hash;
 525 };
 526
 527 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 528
 529 static int
 530 sort_iv_inv_expr_ent (const void *a, const void *b)
 531 {
 532   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 533   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 534
 535   unsigned id1 = (*e1)->id;
 536   unsigned id2 = (*e2)->id;
 537
 538   if (id1 < id2)
 539     return -1;
 540   else if (id1 > id2)
 541     return 1;
 542   else
 543     return 0;
 544 }
 545
 546 /* Hashtable helpers.  */
 547
 548 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 549 {
 550   static inline hashval_t hash (const iv_inv_expr_ent *);
 551   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 552 };
 553
 554 /* Return true if uses of type TYPE represent some form of address.  */
 555
 556 inline bool
 557 address_p (use_type type)
 558 {
 559   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 560 }
 561
 562 /* Hash function for loop invariant expressions.  */
 563
 564 inline hashval_t
 565 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 566 {
 567   return expr->hash;
 568 }
 569
 570 /* Hash table equality function for expressions.  */
 571
 572 inline bool
 573 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 574                            const iv_inv_expr_ent *expr2)
 575 {
 576   return expr1->hash == expr2->hash
 577          && operand_equal_p (expr1->expr, expr2->expr, 0);
 578 }
 579
 580 struct ivopts_data
 581 {
 582   /* The currently optimized loop.  */
 583   class loop *current_loop;
 584   location_t loop_loc;
 585
 586   /* Numbers of iterations for all exits of the current loop.  */
 587   hash_map<edge, tree_niter_desc *> *niters;
 588
 589   /* Number of registers used in it.  */
 590   unsigned regs_used;
 591
 592   /* The size of version_info array allocated.  */
 593   unsigned version_info_size;
 594
 595   /* The array of information for the ssa names.  */
 596   struct version_info *version_info;
 597
 598   /* The hashtable of loop invariant expressions created
 599      by ivopt.  */
 600   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 601
 602   /* The bitmap of indices in version_info whose value was changed.  */
 603   bitmap relevant;
 604
 605   /* The uses of induction variables.  */
 606   vec<iv_group *> vgroups;
 607
 608   /* The candidates.  */
 609   vec<iv_cand *> vcands;
 610
 611   /* A bitmap of important candidates.  */
 612   bitmap important_candidates;
 613
 614   /* Cache used by tree_to_aff_combination_expand.  */
 615   hash_map<tree, name_expansion *> *name_expansion_cache;
 616
 617   /* The hashtable of common candidates derived from iv uses.  */
 618   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 619
 620   /* The common candidates.  */
 621   vec<iv_common_cand *> iv_common_cands;
 622
 623   /* Hash map recording base object information of tree exp.  */
 624   hash_map<tree, tree> *base_object_map;
 625
 626   /* The maximum invariant variable id.  */
 627   unsigned max_inv_var_id;
 628
 629   /* The maximum invariant expression id.  */
 630   unsigned max_inv_expr_id;
 631
 632   /* Number of no_overflow BIVs which are not used in memory address.  */
 633   unsigned bivs_not_used_in_addr;
 634
 635   /* Obstack for iv structure.  */
 636   struct obstack iv_obstack;
 637
 638   /* Whether to consider just related and important candidates when replacing a
 639      use.  */
 640   bool consider_all_candidates;
 641
 642   /* Are we optimizing for speed?  */
 643   bool speed;
 644
 645   /* Whether the loop body includes any function calls.  */
 646   bool body_includes_call;
 647
 648   /* Whether the loop body can only be exited via single exit.  */
 649   bool loop_single_exit_p;
 650
 651   /* Whether the loop has doloop comparison use.  */
 652   bool doloop_use_p;
 653 };
 654
 655 /* An assignment of iv candidates to uses.  */
 656
 657 class iv_ca
 658 {
 659 public:
 660   /* The number of uses covered by the assignment.  */
 661   unsigned upto;
 662
 663   /* Number of uses that cannot be expressed by the candidates in the set.  */
 664   unsigned bad_groups;
 665
 666   /* Candidate assigned to a use, together with the related costs.  */
 667   class cost_pair **cand_for_group;
 668
 669   /* Number of times each candidate is used.  */
 670   unsigned *n_cand_uses;
 671
 672   /* The candidates used.  */
 673   bitmap cands;
 674
 675   /* The number of candidates in the set.  */
 676   unsigned n_cands;
 677
 678   /* The number of invariants needed, including both invariant variants and
 679      invariant expressions.  */
 680   unsigned n_invs;
 681
 682   /* Total cost of expressing uses.  */
 683   comp_cost cand_use_cost;
 684
 685   /* Total cost of candidates.  */
 686   int64_t cand_cost;
 687
 688   /* Number of times each invariant variable is used.  */
 689   unsigned *n_inv_var_uses;
 690
 691   /* Number of times each invariant expression is used.  */
 692   unsigned *n_inv_expr_uses;
 693
 694   /* Total cost of the assignment.  */
 695   comp_cost cost;
 696 };
 697
 698 /* Difference of two iv candidate assignments.  */
 699
 700 struct iv_ca_delta
 701 {
 702   /* Changed group.  */
 703   struct iv_group *group;
 704
 705   /* An old assignment (for rollback purposes).  */
 706   class cost_pair *old_cp;
 707
 708   /* A new assignment.  */
 709   class cost_pair *new_cp;
 710
 711   /* Next change in the list.  */
 712   struct iv_ca_delta *next;
 713 };
 714
 715 /* Bound on number of candidates below that all candidates are considered.  */
 716
 717 #define CONSIDER_ALL_CANDIDATES_BOUND \
 718   ((unsigned) param_iv_consider_all_candidates_bound)
 719
 720 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 721    optimizing such a loop would help, and it would take ages).  */
 722
 723 #define MAX_CONSIDERED_GROUPS \
 724   ((unsigned) param_iv_max_considered_uses)
 725
 726 /* If there are at most this number of ivs in the set, try removing unnecessary
 727    ivs from the set always.  */
 728
 729 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 730   ((unsigned) param_iv_always_prune_cand_set_bound)
 731
 732 /* The list of trees for that the decl_rtl field must be reset is stored
 733    here.  */
 734
 735 static vec<tree> decl_rtl_to_reset;
 736
 737 static comp_cost force_expr_to_var_cost (tree, bool);
 738
 739 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 740
 741 edge
 742 single_dom_exit (class loop *loop)
 743 {
 744   edge exit = single_exit (loop);
 745
 746   if (!exit)
 747     return NULL;
 748
 749   if (!just_once_each_iteration_p (loop, exit->src))
 750     return NULL;
 751
 752   return exit;
 753 }
 754
 755 /* Dumps information about the induction variable IV to FILE.  Don't dump
 756    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 757    preceding spaces indicated by INDENT_LEVEL.  */
 758
 759 void
 760 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 761 {
 762   const char *p;
 763   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 764
 765   if (indent_level > 4)
 766     indent_level = 4;
 767   p = spaces + 8 - (indent_level << 1);
 768
 769   fprintf (file, "%sIV struct:\n", p);
 770   if (iv->ssa_name && dump_name)
 771     {
 772       fprintf (file, "%s  SSA_NAME:\t", p);
 773       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 774       fprintf (file, "\n");
 775     }
 776
 777   fprintf (file, "%s  Type:\t", p);
 778   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 779   fprintf (file, "\n");
 780
 781   fprintf (file, "%s  Base:\t", p);
 782   print_generic_expr (file, iv->base, TDF_SLIM);
 783   fprintf (file, "\n");
 784
 785   fprintf (file, "%s  Step:\t", p);
 786   print_generic_expr (file, iv->step, TDF_SLIM);
 787   fprintf (file, "\n");
 788
 789   if (iv->base_object)
 790     {
 791       fprintf (file, "%s  Object:\t", p);
 792       print_generic_expr (file, iv->base_object, TDF_SLIM);
 793       fprintf (file, "\n");
 794     }
 795
 796   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 797
 798   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 799            p, iv->no_overflow ? "No-overflow" : "Overflow");
 800 }
 801
 802 /* Dumps information about the USE to FILE.  */
 803
 804 void
 805 dump_use (FILE *file, struct iv_use *use)
 806 {
 807   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 808   fprintf (file, "    At stmt:\t");
 809   print_gimple_stmt (file, use->stmt, 0);
 810   fprintf (file, "    At pos:\t");
 811   if (use->op_p)
 812     print_generic_expr (file, *use->op_p, TDF_SLIM);
 813   fprintf (file, "\n");
 814   dump_iv (file, use->iv, false, 2);
 815 }
 816
 817 /* Dumps information about the uses to FILE.  */
 818
 819 void
 820 dump_groups (FILE *file, struct ivopts_data *data)
 821 {
 822   unsigned i, j;
 823   struct iv_group *group;
 824
 825   for (i = 0; i < data->vgroups.length (); i++)
 826     {
 827       group = data->vgroups[i];
 828       fprintf (file, "Group %d:\n", group->id);
 829       if (group->type == USE_NONLINEAR_EXPR)
 830         fprintf (file, "  Type:\tGENERIC\n");
 831       else if (group->type == USE_REF_ADDRESS)
 832         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 833       else if (group->type == USE_PTR_ADDRESS)
 834         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 835       else
 836         {
 837           gcc_assert (group->type == USE_COMPARE);
 838           fprintf (file, "  Type:\tCOMPARE\n");
 839         }
 840       for (j = 0; j < group->vuses.length (); j++)
 841         dump_use (file, group->vuses[j]);
 842     }
 843 }
 844
 845 /* Dumps information about induction variable candidate CAND to FILE.  */
 846
 847 void
 848 dump_cand (FILE *file, struct iv_cand *cand)
 849 {
 850   struct iv *iv = cand->iv;
 851
 852   fprintf (file, "Candidate %d:\n", cand->id);
 853   if (cand->inv_vars)
 854     {
 855       fprintf (file, "  Depend on inv.vars: ");
 856       dump_bitmap (file, cand->inv_vars);
 857     }
 858   if (cand->inv_exprs)
 859     {
 860       fprintf (file, "  Depend on inv.exprs: ");
 861       dump_bitmap (file, cand->inv_exprs);
 862     }
 863
 864   if (cand->var_before)
 865     {
 866       fprintf (file, "  Var befor: ");
 867       print_generic_expr (file, cand->var_before, TDF_SLIM);
 868       fprintf (file, "\n");
 869     }
 870   if (cand->var_after)
 871     {
 872       fprintf (file, "  Var after: ");
 873       print_generic_expr (file, cand->var_after, TDF_SLIM);
 874       fprintf (file, "\n");
 875     }
 876
 877   switch (cand->pos)
 878     {
 879     case IP_NORMAL:
 880       fprintf (file, "  Incr POS: before exit test\n");
 881       break;
 882
 883     case IP_BEFORE_USE:
 884       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 885       break;
 886
 887     case IP_AFTER_USE:
 888       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 889       break;
 890
 891     case IP_END:
 892       fprintf (file, "  Incr POS: at end\n");
 893       break;
 894
 895     case IP_ORIGINAL:
 896       fprintf (file, "  Incr POS: orig biv\n");
 897       break;
 898     }
 899
 900   dump_iv (file, iv, false, 1);
 901 }
 902
 903 /* Returns the info for ssa version VER.  */
 904
 905 static inline struct version_info *
 906 ver_info (struct ivopts_data *data, unsigned ver)
 907 {
 908   return data->version_info + ver;
 909 }
 910
 911 /* Returns the info for ssa name NAME.  */
 912
 913 static inline struct version_info *
 914 name_info (struct ivopts_data *data, tree name)
 915 {
 916   return ver_info (data, SSA_NAME_VERSION (name));
 917 }
 918
 919 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 920    emitted in LOOP.  */
 921
 922 static bool
 923 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 924 {
 925   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 926
 927   gcc_assert (bb);
 928
 929   if (sbb == loop->latch)
 930     return true;
 931
 932   if (sbb != bb)
 933     return false;
 934
 935   return stmt == last_stmt (bb);
 936 }
 937
 938 /* Returns true if STMT if after the place where the original induction
 939    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 940    if the positions are identical.  */
 941
 942 static bool
 943 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 944 {
 945   basic_block cand_bb = gimple_bb (cand->incremented_at);
 946   basic_block stmt_bb = gimple_bb (stmt);
 947
 948   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 949     return false;
 950
 951   if (stmt_bb != cand_bb)
 952     return true;
 953
 954   if (true_if_equal
 955       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 956     return true;
 957   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 958 }
 959
 960 /* Returns true if STMT if after the place where the induction variable
 961    CAND is incremented in LOOP.  */
 962
 963 static bool
 964 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 965 {
 966   switch (cand->pos)
 967     {
 968     case IP_END:
 969       return false;
 970
 971     case IP_NORMAL:
 972       return stmt_after_ip_normal_pos (loop, stmt);
 973
 974     case IP_ORIGINAL:
 975     case IP_AFTER_USE:
 976       return stmt_after_inc_pos (cand, stmt, false);
 977
 978     case IP_BEFORE_USE:
 979       return stmt_after_inc_pos (cand, stmt, true);
 980
 981     default:
 982       gcc_unreachable ();
 983     }
 984 }
 985
 986 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 987
 988 static tree
 989 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 990 {
 991   if (TREE_CODE (*tp) == SSA_NAME
 992       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 993     return *tp;
 994
 995   if (!EXPR_P (*tp))
 996     *walk_subtrees = 0;
 997
 998   return NULL_TREE;
 999 }
1000
1001 /* Returns true if EXPR contains a ssa name that occurs in an
1002    abnormal phi node.  */
1003
1004 bool
1005 contains_abnormal_ssa_name_p (tree expr)
1006 {
1007   return walk_tree_without_duplicates
1008            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1009 }
1010
1011 /*  Returns the structure describing number of iterations determined from
1012     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1013
1014 static class tree_niter_desc *
1015 niter_for_exit (struct ivopts_data *data, edge exit)
1016 {
1017   class tree_niter_desc *desc;
1018   tree_niter_desc **slot;
1019
1020   if (!data->niters)
1021     {
1022       data->niters = new hash_map<edge, tree_niter_desc *>;
1023       slot = NULL;
1024     }
1025   else
1026     slot = data->niters->get (exit);
1027
1028   if (!slot)
1029     {
1030       /* Try to determine number of iterations.  We cannot safely work with ssa
1031          names that appear in phi nodes on abnormal edges, so that we do not
1032          create overlapping life ranges for them (PR 27283).  */
1033       desc = XNEW (class tree_niter_desc);
1034       if (!number_of_iterations_exit (data->current_loop,
1035                                       exit, desc, true)
1036           || contains_abnormal_ssa_name_p (desc->niter))
1037         {
1038           XDELETE (desc);
1039           desc = NULL;
1040         }
1041       data->niters->put (exit, desc);
1042     }
1043   else
1044     desc = *slot;
1045
1046   return desc;
1047 }
1048
1049 /* Returns the structure describing number of iterations determined from
1050    single dominating exit of DATA->current_loop, or NULL if something
1051    goes wrong.  */
1052
1053 static class tree_niter_desc *
1054 niter_for_single_dom_exit (struct ivopts_data *data)
1055 {
1056   edge exit = single_dom_exit (data->current_loop);
1057
1058   if (!exit)
1059     return NULL;
1060
1061   return niter_for_exit (data, exit);
1062 }
1063
1064 /* Initializes data structures used by the iv optimization pass, stored
1065    in DATA.  */
1066
1067 static void
1068 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1069 {
1070   data->version_info_size = 2 * num_ssa_names;
1071   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1072   data->relevant = BITMAP_ALLOC (NULL);
1073   data->important_candidates = BITMAP_ALLOC (NULL);
1074   data->max_inv_var_id = 0;
1075   data->max_inv_expr_id = 0;
1076   data->niters = NULL;
1077   data->vgroups.create (20);
1078   data->vcands.create (20);
1079   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1080   data->name_expansion_cache = NULL;
1081   data->base_object_map = NULL;
1082   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1083   data->iv_common_cands.create (20);
1084   decl_rtl_to_reset.create (20);
1085   gcc_obstack_init (&data->iv_obstack);
1086 }
1087
1088 /* walk_tree callback for determine_base_object.  */
1089
1090 static tree
1091 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1092 {
1093   tree_code code = TREE_CODE (*tp);
1094   tree obj = NULL_TREE;
1095   if (code == ADDR_EXPR)
1096     {
1097       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1098       if (!base)
1099         obj = *tp;
1100       else if (TREE_CODE (base) != MEM_REF)
1101         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1102     }
1103   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1104         obj = fold_convert (ptr_type_node, *tp);
1105
1106   if (!obj)
1107     {
1108       if (!EXPR_P (*tp))
1109         *walk_subtrees = 0;
1110
1111       return NULL_TREE;
1112     }
1113   /* Record special node for multiple base objects and stop.  */
1114   if (*static_cast<tree *> (wdata))
1115     {
1116       *static_cast<tree *> (wdata) = integer_zero_node;
1117       return integer_zero_node;
1118     }
1119   /* Record the base object and continue looking.  */
1120   *static_cast<tree *> (wdata) = obj;
1121   return NULL_TREE;
1122 }
1123
1124 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1125    are able to determine that it does not point to any such object; specially
1126    return integer_zero_node if EXPR contains multiple base objects.  */
1127
1128 static tree
1129 determine_base_object (struct ivopts_data *data, tree expr)
1130 {
1131   tree *slot, obj = NULL_TREE;
1132   if (data->base_object_map)
1133     {
1134       if ((slot = data->base_object_map->get(expr)) != NULL)
1135         return *slot;
1136     }
1137   else
1138     data->base_object_map = new hash_map<tree, tree>;
1139
1140   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1141   data->base_object_map->put (expr, obj);
1142   return obj;
1143 }
1144
1145 /* Return true if address expression with non-DECL_P operand appears
1146    in EXPR.  */
1147
1148 static bool
1149 contain_complex_addr_expr (tree expr)
1150 {
1151   bool res = false;
1152
1153   STRIP_NOPS (expr);
1154   switch (TREE_CODE (expr))
1155     {
1156     case POINTER_PLUS_EXPR:
1157     case PLUS_EXPR:
1158     case MINUS_EXPR:
1159       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1160       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1161       break;
1162
1163     case ADDR_EXPR:
1164       return (!DECL_P (TREE_OPERAND (expr, 0)));
1165
1166     default:
1167       return false;
1168     }
1169
1170   return res;
1171 }
1172
1173 /* Allocates an induction variable with given initial value BASE and step STEP
1174    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1175
1176 static struct iv *
1177 alloc_iv (struct ivopts_data *data, tree base, tree step,
1178           bool no_overflow = false)
1179 {
1180   tree expr = base;
1181   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1182                                               sizeof (struct iv));
1183   gcc_assert (step != NULL_TREE);
1184
1185   /* Lower address expression in base except ones with DECL_P as operand.
1186      By doing this:
1187        1) More accurate cost can be computed for address expressions;
1188        2) Duplicate candidates won't be created for bases in different
1189           forms, like &a[0] and &a.  */
1190   STRIP_NOPS (expr);
1191   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1192       || contain_complex_addr_expr (expr))
1193     {
1194       aff_tree comb;
1195       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1196       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1197     }
1198
1199   iv->base = base;
1200   iv->base_object = determine_base_object (data, base);
1201   iv->step = step;
1202   iv->biv_p = false;
1203   iv->nonlin_use = NULL;
1204   iv->ssa_name = NULL_TREE;
1205   if (!no_overflow
1206        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1207                               base, step))
1208     no_overflow = true;
1209   iv->no_overflow = no_overflow;
1210   iv->have_address_use = false;
1211
1212   return iv;
1213 }
1214
1215 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1216    doesn't overflow.  */
1217
1218 static void
1219 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1220         bool no_overflow)
1221 {
1222   struct version_info *info = name_info (data, iv);
1223
1224   gcc_assert (!info->iv);
1225
1226   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1227   info->iv = alloc_iv (data, base, step, no_overflow);
1228   info->iv->ssa_name = iv;
1229 }
1230
1231 /* Finds induction variable declaration for VAR.  */
1232
1233 static struct iv *
1234 get_iv (struct ivopts_data *data, tree var)
1235 {
1236   basic_block bb;
1237   tree type = TREE_TYPE (var);
1238
1239   if (!POINTER_TYPE_P (type)
1240       && !INTEGRAL_TYPE_P (type))
1241     return NULL;
1242
1243   if (!name_info (data, var)->iv)
1244     {
1245       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1246
1247       if (!bb
1248           || !flow_bb_inside_loop_p (data->current_loop, bb))
1249         {
1250           if (POINTER_TYPE_P (type))
1251             type = sizetype;
1252           set_iv (data, var, var, build_int_cst (type, 0), true);
1253         }
1254     }
1255
1256   return name_info (data, var)->iv;
1257 }
1258
1259 /* Return the first non-invariant ssa var found in EXPR.  */
1260
1261 static tree
1262 extract_single_var_from_expr (tree expr)
1263 {
1264   int i, n;
1265   tree tmp;
1266   enum tree_code code;
1267
1268   if (!expr || is_gimple_min_invariant (expr))
1269     return NULL;
1270
1271   code = TREE_CODE (expr);
1272   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1273     {
1274       n = TREE_OPERAND_LENGTH (expr);
1275       for (i = 0; i < n; i++)
1276         {
1277           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1278
1279           if (tmp)
1280             return tmp;
1281         }
1282     }
1283   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1284 }
1285
1286 /* Finds basic ivs.  */
1287
1288 static bool
1289 find_bivs (struct ivopts_data *data)
1290 {
1291   gphi *phi;
1292   affine_iv iv;
1293   tree step, type, base, stop;
1294   bool found = false;
1295   class loop *loop = data->current_loop;
1296   gphi_iterator psi;
1297
1298   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1299     {
1300       phi = psi.phi ();
1301
1302       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1303         continue;
1304
1305       if (virtual_operand_p (PHI_RESULT (phi)))
1306         continue;
1307
1308       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1309         continue;
1310
1311       if (integer_zerop (iv.step))
1312         continue;
1313
1314       step = iv.step;
1315       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1316       /* Stop expanding iv base at the first ssa var referred by iv step.
1317          Ideally we should stop at any ssa var, because that's expensive
1318          and unusual to happen, we just do it on the first one.
1319
1320          See PR64705 for the rationale.  */
1321       stop = extract_single_var_from_expr (step);
1322       base = expand_simple_operations (base, stop);
1323       if (contains_abnormal_ssa_name_p (base)
1324           || contains_abnormal_ssa_name_p (step))
1325         continue;
1326
1327       type = TREE_TYPE (PHI_RESULT (phi));
1328       base = fold_convert (type, base);
1329       if (step)
1330         {
1331           if (POINTER_TYPE_P (type))
1332             step = convert_to_ptrofftype (step);
1333           else
1334             step = fold_convert (type, step);
1335         }
1336
1337       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1338       found = true;
1339     }
1340
1341   return found;
1342 }
1343
1344 /* Marks basic ivs.  */
1345
1346 static void
1347 mark_bivs (struct ivopts_data *data)
1348 {
1349   gphi *phi;
1350   gimple *def;
1351   tree var;
1352   struct iv *iv, *incr_iv;
1353   class loop *loop = data->current_loop;
1354   basic_block incr_bb;
1355   gphi_iterator psi;
1356
1357   data->bivs_not_used_in_addr = 0;
1358   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1359     {
1360       phi = psi.phi ();
1361
1362       iv = get_iv (data, PHI_RESULT (phi));
1363       if (!iv)
1364         continue;
1365
1366       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1367       def = SSA_NAME_DEF_STMT (var);
1368       /* Don't mark iv peeled from other one as biv.  */
1369       if (def
1370           && gimple_code (def) == GIMPLE_PHI
1371           && gimple_bb (def) == loop->header)
1372         continue;
1373
1374       incr_iv = get_iv (data, var);
1375       if (!incr_iv)
1376         continue;
1377
1378       /* If the increment is in the subloop, ignore it.  */
1379       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1380       if (incr_bb->loop_father != data->current_loop
1381           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1382         continue;
1383
1384       iv->biv_p = true;
1385       incr_iv->biv_p = true;
1386       if (iv->no_overflow)
1387         data->bivs_not_used_in_addr++;
1388       if (incr_iv->no_overflow)
1389         data->bivs_not_used_in_addr++;
1390     }
1391 }
1392
1393 /* Checks whether STMT defines a linear induction variable and stores its
1394    parameters to IV.  */
1395
1396 static bool
1397 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1398 {
1399   tree lhs, stop;
1400   class loop *loop = data->current_loop;
1401
1402   iv->base = NULL_TREE;
1403   iv->step = NULL_TREE;
1404
1405   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1406     return false;
1407
1408   lhs = gimple_assign_lhs (stmt);
1409   if (TREE_CODE (lhs) != SSA_NAME)
1410     return false;
1411
1412   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1413     return false;
1414
1415   /* Stop expanding iv base at the first ssa var referred by iv step.
1416      Ideally we should stop at any ssa var, because that's expensive
1417      and unusual to happen, we just do it on the first one.
1418
1419      See PR64705 for the rationale.  */
1420   stop = extract_single_var_from_expr (iv->step);
1421   iv->base = expand_simple_operations (iv->base, stop);
1422   if (contains_abnormal_ssa_name_p (iv->base)
1423       || contains_abnormal_ssa_name_p (iv->step))
1424     return false;
1425
1426   /* If STMT could throw, then do not consider STMT as defining a GIV.
1427      While this will suppress optimizations, we cannot safely delete this
1428      GIV and associated statements, even if it appears it is not used.  */
1429   if (stmt_could_throw_p (cfun, stmt))
1430     return false;
1431
1432   return true;
1433 }
1434
1435 /* Finds general ivs in statement STMT.  */
1436
1437 static void
1438 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1439 {
1440   affine_iv iv;
1441
1442   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1443     return;
1444
1445   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1446 }
1447
1448 /* Finds general ivs in basic block BB.  */
1449
1450 static void
1451 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1452 {
1453   gimple_stmt_iterator bsi;
1454
1455   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1456     find_givs_in_stmt (data, gsi_stmt (bsi));
1457 }
1458
1459 /* Finds general ivs.  */
1460
1461 static void
1462 find_givs (struct ivopts_data *data)
1463 {
1464   class loop *loop = data->current_loop;
1465   basic_block *body = get_loop_body_in_dom_order (loop);
1466   unsigned i;
1467
1468   for (i = 0; i < loop->num_nodes; i++)
1469     find_givs_in_bb (data, body[i]);
1470   free (body);
1471 }
1472
1473 /* For each ssa name defined in LOOP determines whether it is an induction
1474    variable and if so, its initial value and step.  */
1475
1476 static bool
1477 find_induction_variables (struct ivopts_data *data)
1478 {
1479   unsigned i;
1480   bitmap_iterator bi;
1481
1482   if (!find_bivs (data))
1483     return false;
1484
1485   find_givs (data);
1486   mark_bivs (data);
1487
1488   if (dump_file && (dump_flags & TDF_DETAILS))
1489     {
1490       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1491
1492       if (niter)
1493         {
1494           fprintf (dump_file, "  number of iterations ");
1495           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1496           if (!integer_zerop (niter->may_be_zero))
1497             {
1498               fprintf (dump_file, "; zero if ");
1499               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1500             }
1501           fprintf (dump_file, "\n");
1502         };
1503
1504       fprintf (dump_file, "\n<Induction Vars>:\n");
1505       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1506         {
1507           struct version_info *info = ver_info (data, i);
1508           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1509             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1510         }
1511     }
1512
1513   return true;
1514 }
1515
1516 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1517    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1518    is the const offset stripped from IV base and MEM_TYPE is the type
1519    of the memory being addressed.  For uses of other types, ADDR_BASE
1520    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1521
1522 static struct iv_use *
1523 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1524             gimple *stmt, enum use_type type, tree mem_type,
1525             tree addr_base, poly_uint64 addr_offset)
1526 {
1527   struct iv_use *use = XCNEW (struct iv_use);
1528
1529   use->id = group->vuses.length ();
1530   use->group_id = group->id;
1531   use->type = type;
1532   use->mem_type = mem_type;
1533   use->iv = iv;
1534   use->stmt = stmt;
1535   use->op_p = use_p;
1536   use->addr_base = addr_base;
1537   use->addr_offset = addr_offset;
1538
1539   group->vuses.safe_push (use);
1540   return use;
1541 }
1542
1543 /* Checks whether OP is a loop-level invariant and if so, records it.
1544    NONLINEAR_USE is true if the invariant is used in a way we do not
1545    handle specially.  */
1546
1547 static void
1548 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1549 {
1550   basic_block bb;
1551   struct version_info *info;
1552
1553   if (TREE_CODE (op) != SSA_NAME
1554       || virtual_operand_p (op))
1555     return;
1556
1557   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1558   if (bb
1559       && flow_bb_inside_loop_p (data->current_loop, bb))
1560     return;
1561
1562   info = name_info (data, op);
1563   info->name = op;
1564   info->has_nonlin_use |= nonlinear_use;
1565   if (!info->inv_id)
1566     info->inv_id = ++data->max_inv_var_id;
1567   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1568 }
1569
1570 /* Record a group of TYPE.  */
1571
1572 static struct iv_group *
1573 record_group (struct ivopts_data *data, enum use_type type)
1574 {
1575   struct iv_group *group = XCNEW (struct iv_group);
1576
1577   group->id = data->vgroups.length ();
1578   group->type = type;
1579   group->related_cands = BITMAP_ALLOC (NULL);
1580   group->vuses.create (1);
1581   group->doloop_p = false;
1582
1583   data->vgroups.safe_push (group);
1584   return group;
1585 }
1586
1587 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1588    New group will be created if there is no existing group for the use.
1589    MEM_TYPE is the type of memory being addressed, or NULL if this
1590    isn't an address reference.  */
1591
1592 static struct iv_use *
1593 record_group_use (struct ivopts_data *data, tree *use_p,
1594                   struct iv *iv, gimple *stmt, enum use_type type,
1595                   tree mem_type)
1596 {
1597   tree addr_base = NULL;
1598   struct iv_group *group = NULL;
1599   poly_uint64 addr_offset = 0;
1600
1601   /* Record non address type use in a new group.  */
1602   if (address_p (type))
1603     {
1604       unsigned int i;
1605
1606       addr_base = strip_offset (iv->base, &addr_offset);
1607       for (i = 0; i < data->vgroups.length (); i++)
1608         {
1609           struct iv_use *use;
1610
1611           group = data->vgroups[i];
1612           use = group->vuses[0];
1613           if (!address_p (use->type))
1614             continue;
1615
1616           /* Check if it has the same stripped base and step.  */
1617           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1618               && operand_equal_p (iv->step, use->iv->step, 0)
1619               && operand_equal_p (addr_base, use->addr_base, 0))
1620             break;
1621         }
1622       if (i == data->vgroups.length ())
1623         group = NULL;
1624     }
1625
1626   if (!group)
1627     group = record_group (data, type);
1628
1629   return record_use (group, use_p, iv, stmt, type, mem_type,
1630                      addr_base, addr_offset);
1631 }
1632
1633 /* Checks whether the use OP is interesting and if so, records it.  */
1634
1635 static struct iv_use *
1636 find_interesting_uses_op (struct ivopts_data *data, tree op)
1637 {
1638   struct iv *iv;
1639   gimple *stmt;
1640   struct iv_use *use;
1641
1642   if (TREE_CODE (op) != SSA_NAME)
1643     return NULL;
1644
1645   iv = get_iv (data, op);
1646   if (!iv)
1647     return NULL;
1648
1649   if (iv->nonlin_use)
1650     {
1651       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1652       return iv->nonlin_use;
1653     }
1654
1655   if (integer_zerop (iv->step))
1656     {
1657       record_invariant (data, op, true);
1658       return NULL;
1659     }
1660
1661   stmt = SSA_NAME_DEF_STMT (op);
1662   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1663
1664   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1665   iv->nonlin_use = use;
1666   return use;
1667 }
1668
1669 /* Indicate how compare type iv_use can be handled.  */
1670 enum comp_iv_rewrite
1671 {
1672   COMP_IV_NA,
1673   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1674   COMP_IV_EXPR,
1675   /* We may rewrite compare type iv_uses on both sides of comparison by
1676      expressing value of each iv_use.  */
1677   COMP_IV_EXPR_2,
1678   /* We may rewrite compare type iv_use by expressing value of the iv_use
1679      or by eliminating it with other iv_cand.  */
1680   COMP_IV_ELIM
1681 };
1682
1683 /* Given a condition in statement STMT, checks whether it is a compare
1684    of an induction variable and an invariant.  If this is the case,
1685    CONTROL_VAR is set to location of the iv, BOUND to the location of
1686    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1687    induction variable descriptions, and true is returned.  If this is not
1688    the case, CONTROL_VAR and BOUND are set to the arguments of the
1689    condition and false is returned.  */
1690
1691 static enum comp_iv_rewrite
1692 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1693                        tree **control_var, tree **bound,
1694                        struct iv **iv_var, struct iv **iv_bound)
1695 {
1696   /* The objects returned when COND has constant operands.  */
1697   static struct iv const_iv;
1698   static tree zero;
1699   tree *op0 = &zero, *op1 = &zero;
1700   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1701   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1702
1703   if (gimple_code (stmt) == GIMPLE_COND)
1704     {
1705       gcond *cond_stmt = as_a <gcond *> (stmt);
1706       op0 = gimple_cond_lhs_ptr (cond_stmt);
1707       op1 = gimple_cond_rhs_ptr (cond_stmt);
1708     }
1709   else
1710     {
1711       op0 = gimple_assign_rhs1_ptr (stmt);
1712       op1 = gimple_assign_rhs2_ptr (stmt);
1713     }
1714
1715   zero = integer_zero_node;
1716   const_iv.step = integer_zero_node;
1717
1718   if (TREE_CODE (*op0) == SSA_NAME)
1719     iv0 = get_iv (data, *op0);
1720   if (TREE_CODE (*op1) == SSA_NAME)
1721     iv1 = get_iv (data, *op1);
1722
1723   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1724   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1725     {
1726       rewrite_type = COMP_IV_EXPR_2;
1727       goto end;
1728     }
1729
1730   /* If none side of comparison is IV.  */
1731   if ((!iv0 || integer_zerop (iv0->step))
1732       && (!iv1 || integer_zerop (iv1->step)))
1733     goto end;
1734
1735   /* Control variable may be on the other side.  */
1736   if (!iv0 || integer_zerop (iv0->step))
1737     {
1738       std::swap (op0, op1);
1739       std::swap (iv0, iv1);
1740     }
1741   /* If one side is IV and the other side isn't loop invariant.  */
1742   if (!iv1)
1743     rewrite_type = COMP_IV_EXPR;
1744   /* If one side is IV and the other side is loop invariant.  */
1745   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1746     rewrite_type = COMP_IV_ELIM;
1747
1748 end:
1749   if (control_var)
1750     *control_var = op0;
1751   if (iv_var)
1752     *iv_var = iv0;
1753   if (bound)
1754     *bound = op1;
1755   if (iv_bound)
1756     *iv_bound = iv1;
1757
1758   return rewrite_type;
1759 }
1760
1761 /* Checks whether the condition in STMT is interesting and if so,
1762    records it.  */
1763
1764 static void
1765 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1766 {
1767   tree *var_p, *bound_p;
1768   struct iv *var_iv, *bound_iv;
1769   enum comp_iv_rewrite ret;
1770
1771   ret = extract_cond_operands (data, stmt,
1772                                &var_p, &bound_p, &var_iv, &bound_iv);
1773   if (ret == COMP_IV_NA)
1774     {
1775       find_interesting_uses_op (data, *var_p);
1776       find_interesting_uses_op (data, *bound_p);
1777       return;
1778     }
1779
1780   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1781   /* Record compare type iv_use for iv on the other side of comparison.  */
1782   if (ret == COMP_IV_EXPR_2)
1783     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1784 }
1785
1786 /* Returns the outermost loop EXPR is obviously invariant in
1787    relative to the loop LOOP, i.e. if all its operands are defined
1788    outside of the returned loop.  Returns NULL if EXPR is not
1789    even obviously invariant in LOOP.  */
1790
1791 class loop *
1792 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1793 {
1794   basic_block def_bb;
1795   unsigned i, len;
1796
1797   if (is_gimple_min_invariant (expr))
1798     return current_loops->tree_root;
1799
1800   if (TREE_CODE (expr) == SSA_NAME)
1801     {
1802       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1803       if (def_bb)
1804         {
1805           if (flow_bb_inside_loop_p (loop, def_bb))
1806             return NULL;
1807           return superloop_at_depth (loop,
1808                                      loop_depth (def_bb->loop_father) + 1);
1809         }
1810
1811       return current_loops->tree_root;
1812     }
1813
1814   if (!EXPR_P (expr))
1815     return NULL;
1816
1817   unsigned maxdepth = 0;
1818   len = TREE_OPERAND_LENGTH (expr);
1819   for (i = 0; i < len; i++)
1820     {
1821       class loop *ivloop;
1822       if (!TREE_OPERAND (expr, i))
1823         continue;
1824
1825       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1826       if (!ivloop)
1827         return NULL;
1828       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1829     }
1830
1831   return superloop_at_depth (loop, maxdepth);
1832 }
1833
1834 /* Returns true if expression EXPR is obviously invariant in LOOP,
1835    i.e. if all its operands are defined outside of the LOOP.  LOOP
1836    should not be the function body.  */
1837
1838 bool
1839 expr_invariant_in_loop_p (class loop *loop, tree expr)
1840 {
1841   basic_block def_bb;
1842   unsigned i, len;
1843
1844   gcc_assert (loop_depth (loop) > 0);
1845
1846   if (is_gimple_min_invariant (expr))
1847     return true;
1848
1849   if (TREE_CODE (expr) == SSA_NAME)
1850     {
1851       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1852       if (def_bb
1853           && flow_bb_inside_loop_p (loop, def_bb))
1854         return false;
1855
1856       return true;
1857     }
1858
1859   if (!EXPR_P (expr))
1860     return false;
1861
1862   len = TREE_OPERAND_LENGTH (expr);
1863   for (i = 0; i < len; i++)
1864     if (TREE_OPERAND (expr, i)
1865         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1866       return false;
1867
1868   return true;
1869 }
1870
1871 /* Given expression EXPR which computes inductive values with respect
1872    to loop recorded in DATA, this function returns biv from which EXPR
1873    is derived by tracing definition chains of ssa variables in EXPR.  */
1874
1875 static struct iv*
1876 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1877 {
1878   struct iv *iv;
1879   unsigned i, n;
1880   tree e2, e1;
1881   enum tree_code code;
1882   gimple *stmt;
1883
1884   if (expr == NULL_TREE)
1885     return NULL;
1886
1887   if (is_gimple_min_invariant (expr))
1888     return NULL;
1889
1890   code = TREE_CODE (expr);
1891   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1892     {
1893       n = TREE_OPERAND_LENGTH (expr);
1894       for (i = 0; i < n; i++)
1895         {
1896           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1897           if (iv)
1898             return iv;
1899         }
1900     }
1901
1902   /* Stop if it's not ssa name.  */
1903   if (code != SSA_NAME)
1904     return NULL;
1905
1906   iv = get_iv (data, expr);
1907   if (!iv || integer_zerop (iv->step))
1908     return NULL;
1909   else if (iv->biv_p)
1910     return iv;
1911
1912   stmt = SSA_NAME_DEF_STMT (expr);
1913   if (gphi *phi = dyn_cast <gphi *> (stmt))
1914     {
1915       ssa_op_iter iter;
1916       use_operand_p use_p;
1917       basic_block phi_bb = gimple_bb (phi);
1918
1919       /* Skip loop header PHI that doesn't define biv.  */
1920       if (phi_bb->loop_father == data->current_loop)
1921         return NULL;
1922
1923       if (virtual_operand_p (gimple_phi_result (phi)))
1924         return NULL;
1925
1926       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1927         {
1928           tree use = USE_FROM_PTR (use_p);
1929           iv = find_deriving_biv_for_expr (data, use);
1930           if (iv)
1931             return iv;
1932         }
1933       return NULL;
1934     }
1935   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1936     return NULL;
1937
1938   e1 = gimple_assign_rhs1 (stmt);
1939   code = gimple_assign_rhs_code (stmt);
1940   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1941     return find_deriving_biv_for_expr (data, e1);
1942
1943   switch (code)
1944     {
1945     case MULT_EXPR:
1946     case PLUS_EXPR:
1947     case MINUS_EXPR:
1948     case POINTER_PLUS_EXPR:
1949       /* Increments, decrements and multiplications by a constant
1950          are simple.  */
1951       e2 = gimple_assign_rhs2 (stmt);
1952       iv = find_deriving_biv_for_expr (data, e2);
1953       if (iv)
1954         return iv;
1955       gcc_fallthrough ();
1956
1957     CASE_CONVERT:
1958       /* Casts are simple.  */
1959       return find_deriving_biv_for_expr (data, e1);
1960
1961     default:
1962       break;
1963     }
1964
1965   return NULL;
1966 }
1967
1968 /* Record BIV, its predecessor and successor that they are used in
1969    address type uses.  */
1970
1971 static void
1972 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1973 {
1974   unsigned i;
1975   tree type, base_1, base_2;
1976   bitmap_iterator bi;
1977
1978   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1979       || biv->have_address_use || !biv->no_overflow)
1980     return;
1981
1982   type = TREE_TYPE (biv->base);
1983   if (!INTEGRAL_TYPE_P (type))
1984     return;
1985
1986   biv->have_address_use = true;
1987   data->bivs_not_used_in_addr--;
1988   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1989   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1990     {
1991       struct iv *iv = ver_info (data, i)->iv;
1992
1993       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1994           || iv->have_address_use || !iv->no_overflow)
1995         continue;
1996
1997       if (type != TREE_TYPE (iv->base)
1998           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1999         continue;
2000
2001       if (!operand_equal_p (biv->step, iv->step, 0))
2002         continue;
2003
2004       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2005       if (operand_equal_p (base_1, iv->base, 0)
2006           || operand_equal_p (base_2, biv->base, 0))
2007         {
2008           iv->have_address_use = true;
2009           data->bivs_not_used_in_addr--;
2010         }
2011     }
2012 }
2013
2014 /* Cumulates the steps of indices into DATA and replaces their values with the
2015    initial ones.  Returns false when the value of the index cannot be determined.
2016    Callback for for_each_index.  */
2017
2018 struct ifs_ivopts_data
2019 {
2020   struct ivopts_data *ivopts_data;
2021   gimple *stmt;
2022   tree step;
2023 };
2024
2025 static bool
2026 idx_find_step (tree base, tree *idx, void *data)
2027 {
2028   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2029   struct iv *iv;
2030   bool use_overflow_semantics = false;
2031   tree step, iv_base, iv_step, lbound, off;
2032   class loop *loop = dta->ivopts_data->current_loop;
2033
2034   /* If base is a component ref, require that the offset of the reference
2035      be invariant.  */
2036   if (TREE_CODE (base) == COMPONENT_REF)
2037     {
2038       off = component_ref_field_offset (base);
2039       return expr_invariant_in_loop_p (loop, off);
2040     }
2041
2042   /* If base is array, first check whether we will be able to move the
2043      reference out of the loop (in order to take its address in strength
2044      reduction).  In order for this to work we need both lower bound
2045      and step to be loop invariants.  */
2046   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2047     {
2048       /* Moreover, for a range, the size needs to be invariant as well.  */
2049       if (TREE_CODE (base) == ARRAY_RANGE_REF
2050           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2051         return false;
2052
2053       step = array_ref_element_size (base);
2054       lbound = array_ref_low_bound (base);
2055
2056       if (!expr_invariant_in_loop_p (loop, step)
2057           || !expr_invariant_in_loop_p (loop, lbound))
2058         return false;
2059     }
2060
2061   if (TREE_CODE (*idx) != SSA_NAME)
2062     return true;
2063
2064   iv = get_iv (dta->ivopts_data, *idx);
2065   if (!iv)
2066     return false;
2067
2068   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2069           *&x[0], which is not folded and does not trigger the
2070           ARRAY_REF path below.  */
2071   *idx = iv->base;
2072
2073   if (integer_zerop (iv->step))
2074     return true;
2075
2076   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2077     {
2078       step = array_ref_element_size (base);
2079
2080       /* We only handle addresses whose step is an integer constant.  */
2081       if (TREE_CODE (step) != INTEGER_CST)
2082         return false;
2083     }
2084   else
2085     /* The step for pointer arithmetics already is 1 byte.  */
2086     step = size_one_node;
2087
2088   iv_base = iv->base;
2089   iv_step = iv->step;
2090   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2091     use_overflow_semantics = true;
2092
2093   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2094                             sizetype, &iv_base, &iv_step, dta->stmt,
2095                             use_overflow_semantics))
2096     {
2097       /* The index might wrap.  */
2098       return false;
2099     }
2100
2101   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2102   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2103
2104   if (dta->ivopts_data->bivs_not_used_in_addr)
2105     {
2106       if (!iv->biv_p)
2107         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2108
2109       record_biv_for_address_use (dta->ivopts_data, iv);
2110     }
2111   return true;
2112 }
2113
2114 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2115    object is passed to it in DATA.  */
2116
2117 static bool
2118 idx_record_use (tree base, tree *idx,
2119                 void *vdata)
2120 {
2121   struct ivopts_data *data = (struct ivopts_data *) vdata;
2122   find_interesting_uses_op (data, *idx);
2123   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2124     {
2125       find_interesting_uses_op (data, array_ref_element_size (base));
2126       find_interesting_uses_op (data, array_ref_low_bound (base));
2127     }
2128   return true;
2129 }
2130
2131 /* If we can prove that TOP = cst * BOT for some constant cst,
2132    store cst to MUL and return true.  Otherwise return false.
2133    The returned value is always sign-extended, regardless of the
2134    signedness of TOP and BOT.  */
2135
2136 static bool
2137 constant_multiple_of (tree top, tree bot, widest_int *mul)
2138 {
2139   tree mby;
2140   enum tree_code code;
2141   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2142   widest_int res, p0, p1;
2143
2144   STRIP_NOPS (top);
2145   STRIP_NOPS (bot);
2146
2147   if (operand_equal_p (top, bot, 0))
2148     {
2149       *mul = 1;
2150       return true;
2151     }
2152
2153   code = TREE_CODE (top);
2154   switch (code)
2155     {
2156     case MULT_EXPR:
2157       mby = TREE_OPERAND (top, 1);
2158       if (TREE_CODE (mby) != INTEGER_CST)
2159         return false;
2160
2161       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2162         return false;
2163
2164       *mul = wi::sext (res * wi::to_widest (mby), precision);
2165       return true;
2166
2167     case PLUS_EXPR:
2168     case MINUS_EXPR:
2169       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2170           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2171         return false;
2172
2173       if (code == MINUS_EXPR)
2174         p1 = -p1;
2175       *mul = wi::sext (p0 + p1, precision);
2176       return true;
2177
2178     case INTEGER_CST:
2179       if (TREE_CODE (bot) != INTEGER_CST)
2180         return false;
2181
2182       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2183       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2184       if (p1 == 0)
2185         return false;
2186       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2187       return res == 0;
2188
2189     default:
2190       if (POLY_INT_CST_P (top)
2191           && POLY_INT_CST_P (bot)
2192           && constant_multiple_p (wi::to_poly_widest (top),
2193                                   wi::to_poly_widest (bot), mul))
2194         return true;
2195
2196       return false;
2197     }
2198 }
2199
2200 /* Return true if memory reference REF with step STEP may be unaligned.  */
2201
2202 static bool
2203 may_be_unaligned_p (tree ref, tree step)
2204 {
2205   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2206      thus they are not misaligned.  */
2207   if (TREE_CODE (ref) == TARGET_MEM_REF)
2208     return false;
2209
2210   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2211   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2212     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2213
2214   unsigned HOST_WIDE_INT bitpos;
2215   unsigned int ref_align;
2216   get_object_alignment_1 (ref, &ref_align, &bitpos);
2217   if (ref_align < align
2218       || (bitpos % align) != 0
2219       || (bitpos % BITS_PER_UNIT) != 0)
2220     return true;
2221
2222   unsigned int trailing_zeros = tree_ctz (step);
2223   if (trailing_zeros < HOST_BITS_PER_INT
2224       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2225     return true;
2226
2227   return false;
2228 }
2229
2230 /* Return true if EXPR may be non-addressable.   */
2231
2232 bool
2233 may_be_nonaddressable_p (tree expr)
2234 {
2235   switch (TREE_CODE (expr))
2236     {
2237     case VAR_DECL:
2238       /* Check if it's a register variable.  */
2239       return DECL_HARD_REGISTER (expr);
2240
2241     case TARGET_MEM_REF:
2242       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2243          target, thus they are always addressable.  */
2244       return false;
2245
2246     case MEM_REF:
2247       /* Likewise for MEM_REFs, modulo the storage order.  */
2248       return REF_REVERSE_STORAGE_ORDER (expr);
2249
2250     case BIT_FIELD_REF:
2251       if (REF_REVERSE_STORAGE_ORDER (expr))
2252         return true;
2253       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2254
2255     case COMPONENT_REF:
2256       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2257         return true;
2258       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2259              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2260
2261     case ARRAY_REF:
2262     case ARRAY_RANGE_REF:
2263       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2264         return true;
2265       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2266
2267     case VIEW_CONVERT_EXPR:
2268       /* This kind of view-conversions may wrap non-addressable objects
2269          and make them look addressable.  After some processing the
2270          non-addressability may be uncovered again, causing ADDR_EXPRs
2271          of inappropriate objects to be built.  */
2272       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2273           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2274         return true;
2275       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2276
2277     CASE_CONVERT:
2278       return true;
2279
2280     default:
2281       break;
2282     }
2283
2284   return false;
2285 }
2286
2287 /* Finds addresses in *OP_P inside STMT.  */
2288
2289 static void
2290 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2291                                tree *op_p)
2292 {
2293   tree base = *op_p, step = size_zero_node;
2294   struct iv *civ;
2295   struct ifs_ivopts_data ifs_ivopts_data;
2296
2297   /* Do not play with volatile memory references.  A bit too conservative,
2298      perhaps, but safe.  */
2299   if (gimple_has_volatile_ops (stmt))
2300     goto fail;
2301
2302   /* Ignore bitfields for now.  Not really something terribly complicated
2303      to handle.  TODO.  */
2304   if (TREE_CODE (base) == BIT_FIELD_REF)
2305     goto fail;
2306
2307   base = unshare_expr (base);
2308
2309   if (TREE_CODE (base) == TARGET_MEM_REF)
2310     {
2311       tree type = build_pointer_type (TREE_TYPE (base));
2312       tree astep;
2313
2314       if (TMR_BASE (base)
2315           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2316         {
2317           civ = get_iv (data, TMR_BASE (base));
2318           if (!civ)
2319             goto fail;
2320
2321           TMR_BASE (base) = civ->base;
2322           step = civ->step;
2323         }
2324       if (TMR_INDEX2 (base)
2325           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2326         {
2327           civ = get_iv (data, TMR_INDEX2 (base));
2328           if (!civ)
2329             goto fail;
2330
2331           TMR_INDEX2 (base) = civ->base;
2332           step = civ->step;
2333         }
2334       if (TMR_INDEX (base)
2335           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2336         {
2337           civ = get_iv (data, TMR_INDEX (base));
2338           if (!civ)
2339             goto fail;
2340
2341           TMR_INDEX (base) = civ->base;
2342           astep = civ->step;
2343
2344           if (astep)
2345             {
2346               if (TMR_STEP (base))
2347                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2348
2349               step = fold_build2 (PLUS_EXPR, type, step, astep);
2350             }
2351         }
2352
2353       if (integer_zerop (step))
2354         goto fail;
2355       base = tree_mem_ref_addr (type, base);
2356     }
2357   else
2358     {
2359       ifs_ivopts_data.ivopts_data = data;
2360       ifs_ivopts_data.stmt = stmt;
2361       ifs_ivopts_data.step = size_zero_node;
2362       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2363           || integer_zerop (ifs_ivopts_data.step))
2364         goto fail;
2365       step = ifs_ivopts_data.step;
2366
2367       /* Check that the base expression is addressable.  This needs
2368          to be done after substituting bases of IVs into it.  */
2369       if (may_be_nonaddressable_p (base))
2370         goto fail;
2371
2372       /* Moreover, on strict alignment platforms, check that it is
2373          sufficiently aligned.  */
2374       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2375         goto fail;
2376
2377       base = build_fold_addr_expr (base);
2378
2379       /* Substituting bases of IVs into the base expression might
2380          have caused folding opportunities.  */
2381       if (TREE_CODE (base) == ADDR_EXPR)
2382         {
2383           tree *ref = &TREE_OPERAND (base, 0);
2384           while (handled_component_p (*ref))
2385             ref = &TREE_OPERAND (*ref, 0);
2386           if (TREE_CODE (*ref) == MEM_REF)
2387             {
2388               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2389                                       TREE_OPERAND (*ref, 0),
2390                                       TREE_OPERAND (*ref, 1));
2391               if (tem)
2392                 *ref = tem;
2393             }
2394         }
2395     }
2396
2397   civ = alloc_iv (data, base, step);
2398   /* Fail if base object of this memory reference is unknown.  */
2399   if (civ->base_object == NULL_TREE)
2400     goto fail;
2401
2402   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2403   return;
2404
2405 fail:
2406   for_each_index (op_p, idx_record_use, data);
2407 }
2408
2409 /* Finds and records invariants used in STMT.  */
2410
2411 static void
2412 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2413 {
2414   ssa_op_iter iter;
2415   use_operand_p use_p;
2416   tree op;
2417
2418   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2419     {
2420       op = USE_FROM_PTR (use_p);
2421       record_invariant (data, op, false);
2422     }
2423 }
2424
2425 /* CALL calls an internal function.  If operand *OP_P will become an
2426    address when the call is expanded, return the type of the memory
2427    being addressed, otherwise return null.  */
2428
2429 static tree
2430 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2431 {
2432   switch (gimple_call_internal_fn (call))
2433     {
2434     case IFN_MASK_LOAD:
2435     case IFN_MASK_LOAD_LANES:
2436       if (op_p == gimple_call_arg_ptr (call, 0))
2437         return TREE_TYPE (gimple_call_lhs (call));
2438       return NULL_TREE;
2439
2440     case IFN_MASK_STORE:
2441     case IFN_MASK_STORE_LANES:
2442       if (op_p == gimple_call_arg_ptr (call, 0))
2443         return TREE_TYPE (gimple_call_arg (call, 3));
2444       return NULL_TREE;
2445
2446     default:
2447       return NULL_TREE;
2448     }
2449 }
2450
2451 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2452    Return true if the operand will become an address when STMT
2453    is expanded and record the associated address use if so.  */
2454
2455 static bool
2456 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2457                        struct iv *iv)
2458 {
2459   /* Fail if base object of this memory reference is unknown.  */
2460   if (iv->base_object == NULL_TREE)
2461     return false;
2462
2463   tree mem_type = NULL_TREE;
2464   if (gcall *call = dyn_cast <gcall *> (stmt))
2465     if (gimple_call_internal_p (call))
2466       mem_type = get_mem_type_for_internal_fn (call, op_p);
2467   if (mem_type)
2468     {
2469       iv = alloc_iv (data, iv->base, iv->step);
2470       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2471       return true;
2472     }
2473   return false;
2474 }
2475
2476 /* Finds interesting uses of induction variables in the statement STMT.  */
2477
2478 static void
2479 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2480 {
2481   struct iv *iv;
2482   tree op, *lhs, *rhs;
2483   ssa_op_iter iter;
2484   use_operand_p use_p;
2485   enum tree_code code;
2486
2487   find_invariants_stmt (data, stmt);
2488
2489   if (gimple_code (stmt) == GIMPLE_COND)
2490     {
2491       find_interesting_uses_cond (data, stmt);
2492       return;
2493     }
2494
2495   if (is_gimple_assign (stmt))
2496     {
2497       lhs = gimple_assign_lhs_ptr (stmt);
2498       rhs = gimple_assign_rhs1_ptr (stmt);
2499
2500       if (TREE_CODE (*lhs) == SSA_NAME)
2501         {
2502           /* If the statement defines an induction variable, the uses are not
2503              interesting by themselves.  */
2504
2505           iv = get_iv (data, *lhs);
2506
2507           if (iv && !integer_zerop (iv->step))
2508             return;
2509         }
2510
2511       code = gimple_assign_rhs_code (stmt);
2512       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2513           && (REFERENCE_CLASS_P (*rhs)
2514               || is_gimple_val (*rhs)))
2515         {
2516           if (REFERENCE_CLASS_P (*rhs))
2517             find_interesting_uses_address (data, stmt, rhs);
2518           else
2519             find_interesting_uses_op (data, *rhs);
2520
2521           if (REFERENCE_CLASS_P (*lhs))
2522             find_interesting_uses_address (data, stmt, lhs);
2523           return;
2524         }
2525       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2526         {
2527           find_interesting_uses_cond (data, stmt);
2528           return;
2529         }
2530
2531       /* TODO -- we should also handle address uses of type
2532
2533          memory = call (whatever);
2534
2535          and
2536
2537          call (memory).  */
2538     }
2539
2540   if (gimple_code (stmt) == GIMPLE_PHI
2541       && gimple_bb (stmt) == data->current_loop->header)
2542     {
2543       iv = get_iv (data, PHI_RESULT (stmt));
2544
2545       if (iv && !integer_zerop (iv->step))
2546         return;
2547     }
2548
2549   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2550     {
2551       op = USE_FROM_PTR (use_p);
2552
2553       if (TREE_CODE (op) != SSA_NAME)
2554         continue;
2555
2556       iv = get_iv (data, op);
2557       if (!iv)
2558         continue;
2559
2560       if (!find_address_like_use (data, stmt, use_p->use, iv))
2561         find_interesting_uses_op (data, op);
2562     }
2563 }
2564
2565 /* Finds interesting uses of induction variables outside of loops
2566    on loop exit edge EXIT.  */
2567
2568 static void
2569 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2570 {
2571   gphi *phi;
2572   gphi_iterator psi;
2573   tree def;
2574
2575   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2576     {
2577       phi = psi.phi ();
2578       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2579       if (!virtual_operand_p (def))
2580         find_interesting_uses_op (data, def);
2581     }
2582 }
2583
2584 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2585    mode for memory reference represented by USE.  */
2586
2587 static GTY (()) vec<rtx, va_gc> *addr_list;
2588
2589 static bool
2590 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2591 {
2592   rtx reg, addr;
2593   unsigned list_index;
2594   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2595   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2596
2597   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2598   if (list_index >= vec_safe_length (addr_list))
2599     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2600
2601   addr = (*addr_list)[list_index];
2602   if (!addr)
2603     {
2604       addr_mode = targetm.addr_space.address_mode (as);
2605       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2606       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2607       (*addr_list)[list_index] = addr;
2608     }
2609   else
2610     addr_mode = GET_MODE (addr);
2611
2612   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2613   return (memory_address_addr_space_p (mem_mode, addr, as));
2614 }
2615
2616 /* Comparison function to sort group in ascending order of addr_offset.  */
2617
2618 static int
2619 group_compare_offset (const void *a, const void *b)
2620 {
2621   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2622   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2623
2624   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2625 }
2626
2627 /* Check if small groups should be split.  Return true if no group
2628    contains more than two uses with distinct addr_offsets.  Return
2629    false otherwise.  We want to split such groups because:
2630
2631      1) Small groups don't have much benefit and may interfer with
2632         general candidate selection.
2633      2) Size for problem with only small groups is usually small and
2634         general algorithm can handle it well.
2635
2636    TODO -- Above claim may not hold when we want to merge memory
2637    accesses with conseuctive addresses.  */
2638
2639 static bool
2640 split_small_address_groups_p (struct ivopts_data *data)
2641 {
2642   unsigned int i, j, distinct = 1;
2643   struct iv_use *pre;
2644   struct iv_group *group;
2645
2646   for (i = 0; i < data->vgroups.length (); i++)
2647     {
2648       group = data->vgroups[i];
2649       if (group->vuses.length () == 1)
2650         continue;
2651
2652       gcc_assert (address_p (group->type));
2653       if (group->vuses.length () == 2)
2654         {
2655           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2656                                       group->vuses[1]->addr_offset) > 0)
2657             std::swap (group->vuses[0], group->vuses[1]);
2658         }
2659       else
2660         group->vuses.qsort (group_compare_offset);
2661
2662       if (distinct > 2)
2663         continue;
2664
2665       distinct = 1;
2666       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2667         {
2668           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2669             {
2670               pre = group->vuses[j];
2671               distinct++;
2672             }
2673
2674           if (distinct > 2)
2675             break;
2676         }
2677     }
2678
2679   return (distinct <= 2);
2680 }
2681
2682 /* For each group of address type uses, this function further groups
2683    these uses according to the maximum offset supported by target's
2684    [base + offset] addressing mode.  */
2685
2686 static void
2687 split_address_groups (struct ivopts_data *data)
2688 {
2689   unsigned int i, j;
2690   /* Always split group.  */
2691   bool split_p = split_small_address_groups_p (data);
2692
2693   for (i = 0; i < data->vgroups.length (); i++)
2694     {
2695       struct iv_group *new_group = NULL;
2696       struct iv_group *group = data->vgroups[i];
2697       struct iv_use *use = group->vuses[0];
2698
2699       use->id = 0;
2700       use->group_id = group->id;
2701       if (group->vuses.length () == 1)
2702         continue;
2703
2704       gcc_assert (address_p (use->type));
2705
2706       for (j = 1; j < group->vuses.length ();)
2707         {
2708           struct iv_use *next = group->vuses[j];
2709           poly_int64 offset = next->addr_offset - use->addr_offset;
2710
2711           /* Split group if aksed to, or the offset against the first
2712              use can't fit in offset part of addressing mode.  IV uses
2713              having the same offset are still kept in one group.  */
2714           if (maybe_ne (offset, 0)
2715               && (split_p || !addr_offset_valid_p (use, offset)))
2716             {
2717               if (!new_group)
2718                 new_group = record_group (data, group->type);
2719               group->vuses.ordered_remove (j);
2720               new_group->vuses.safe_push (next);
2721               continue;
2722             }
2723
2724           next->id = j;
2725           next->group_id = group->id;
2726           j++;
2727         }
2728     }
2729 }
2730
2731 /* Finds uses of the induction variables that are interesting.  */
2732
2733 static void
2734 find_interesting_uses (struct ivopts_data *data)
2735 {
2736   basic_block bb;
2737   gimple_stmt_iterator bsi;
2738   basic_block *body = get_loop_body (data->current_loop);
2739   unsigned i;
2740   edge e;
2741
2742   for (i = 0; i < data->current_loop->num_nodes; i++)
2743     {
2744       edge_iterator ei;
2745       bb = body[i];
2746
2747       FOR_EACH_EDGE (e, ei, bb->succs)
2748         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2749             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2750           find_interesting_uses_outside (data, e);
2751
2752       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2753         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2754       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2755         if (!is_gimple_debug (gsi_stmt (bsi)))
2756           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2757     }
2758   free (body);
2759
2760   split_address_groups (data);
2761
2762   if (dump_file && (dump_flags & TDF_DETAILS))
2763     {
2764       fprintf (dump_file, "\n<IV Groups>:\n");
2765       dump_groups (dump_file, data);
2766       fprintf (dump_file, "\n");
2767     }
2768 }
2769
2770 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2771    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2772    we are at the top-level of the processed address.  */
2773
2774 static tree
2775 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2776                 poly_int64 *offset)
2777 {
2778   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2779   enum tree_code code;
2780   tree type, orig_type = TREE_TYPE (expr);
2781   poly_int64 off0, off1;
2782   HOST_WIDE_INT st;
2783   tree orig_expr = expr;
2784
2785   STRIP_NOPS (expr);
2786
2787   type = TREE_TYPE (expr);
2788   code = TREE_CODE (expr);
2789   *offset = 0;
2790
2791   switch (code)
2792     {
2793     case POINTER_PLUS_EXPR:
2794     case PLUS_EXPR:
2795     case MINUS_EXPR:
2796       op0 = TREE_OPERAND (expr, 0);
2797       op1 = TREE_OPERAND (expr, 1);
2798
2799       op0 = strip_offset_1 (op0, false, false, &off0);
2800       op1 = strip_offset_1 (op1, false, false, &off1);
2801
2802       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2803       if (op0 == TREE_OPERAND (expr, 0)
2804           && op1 == TREE_OPERAND (expr, 1))
2805         return orig_expr;
2806
2807       if (integer_zerop (op1))
2808         expr = op0;
2809       else if (integer_zerop (op0))
2810         {
2811           if (code == MINUS_EXPR)
2812             expr = fold_build1 (NEGATE_EXPR, type, op1);
2813           else
2814             expr = op1;
2815         }
2816       else
2817         expr = fold_build2 (code, type, op0, op1);
2818
2819       return fold_convert (orig_type, expr);
2820
2821     case MULT_EXPR:
2822       op1 = TREE_OPERAND (expr, 1);
2823       if (!cst_and_fits_in_hwi (op1))
2824         return orig_expr;
2825
2826       op0 = TREE_OPERAND (expr, 0);
2827       op0 = strip_offset_1 (op0, false, false, &off0);
2828       if (op0 == TREE_OPERAND (expr, 0))
2829         return orig_expr;
2830
2831       *offset = off0 * int_cst_value (op1);
2832       if (integer_zerop (op0))
2833         expr = op0;
2834       else
2835         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2836
2837       return fold_convert (orig_type, expr);
2838
2839     case ARRAY_REF:
2840     case ARRAY_RANGE_REF:
2841       if (!inside_addr)
2842         return orig_expr;
2843
2844       step = array_ref_element_size (expr);
2845       if (!cst_and_fits_in_hwi (step))
2846         break;
2847
2848       st = int_cst_value (step);
2849       op1 = TREE_OPERAND (expr, 1);
2850       op1 = strip_offset_1 (op1, false, false, &off1);
2851       *offset = off1 * st;
2852
2853       if (top_compref
2854           && integer_zerop (op1))
2855         {
2856           /* Strip the component reference completely.  */
2857           op0 = TREE_OPERAND (expr, 0);
2858           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2859           *offset += off0;
2860           return op0;
2861         }
2862       break;
2863
2864     case COMPONENT_REF:
2865       {
2866         tree field;
2867
2868         if (!inside_addr)
2869           return orig_expr;
2870
2871         tmp = component_ref_field_offset (expr);
2872         field = TREE_OPERAND (expr, 1);
2873         if (top_compref
2874             && cst_and_fits_in_hwi (tmp)
2875             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2876           {
2877             HOST_WIDE_INT boffset, abs_off;
2878
2879             /* Strip the component reference completely.  */
2880             op0 = TREE_OPERAND (expr, 0);
2881             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2882             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2883             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2884             if (boffset < 0)
2885               abs_off = -abs_off;
2886
2887             *offset = off0 + int_cst_value (tmp) + abs_off;
2888             return op0;
2889           }
2890       }
2891       break;
2892
2893     case ADDR_EXPR:
2894       op0 = TREE_OPERAND (expr, 0);
2895       op0 = strip_offset_1 (op0, true, true, &off0);
2896       *offset += off0;
2897
2898       if (op0 == TREE_OPERAND (expr, 0))
2899         return orig_expr;
2900
2901       expr = build_fold_addr_expr (op0);
2902       return fold_convert (orig_type, expr);
2903
2904     case MEM_REF:
2905       /* ???  Offset operand?  */
2906       inside_addr = false;
2907       break;
2908
2909     default:
2910       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2911         return build_int_cst (orig_type, 0);
2912       return orig_expr;
2913     }
2914
2915   /* Default handling of expressions for that we want to recurse into
2916      the first operand.  */
2917   op0 = TREE_OPERAND (expr, 0);
2918   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2919   *offset += off0;
2920
2921   if (op0 == TREE_OPERAND (expr, 0)
2922       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2923     return orig_expr;
2924
2925   expr = copy_node (expr);
2926   TREE_OPERAND (expr, 0) = op0;
2927   if (op1)
2928     TREE_OPERAND (expr, 1) = op1;
2929
2930   /* Inside address, we might strip the top level component references,
2931      thus changing type of the expression.  Handling of ADDR_EXPR
2932      will fix that.  */
2933   expr = fold_convert (orig_type, expr);
2934
2935   return expr;
2936 }
2937
2938 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2939
2940 tree
2941 strip_offset (tree expr, poly_uint64_pod *offset)
2942 {
2943   poly_int64 off;
2944   tree core = strip_offset_1 (expr, false, false, &off);
2945   *offset = off;
2946   return core;
2947 }
2948
2949 /* Returns variant of TYPE that can be used as base for different uses.
2950    We return unsigned type with the same precision, which avoids problems
2951    with overflows.  */
2952
2953 static tree
2954 generic_type_for (tree type)
2955 {
2956   if (POINTER_TYPE_P (type))
2957     return unsigned_type_for (type);
2958
2959   if (TYPE_UNSIGNED (type))
2960     return type;
2961
2962   return unsigned_type_for (type);
2963 }
2964
2965 /* Private data for walk_tree.  */
2966
2967 struct walk_tree_data
2968 {
2969   bitmap *inv_vars;
2970   struct ivopts_data *idata;
2971 };
2972
2973 /* Callback function for walk_tree, it records invariants and symbol
2974    reference in *EXPR_P.  DATA is the structure storing result info.  */
2975
2976 static tree
2977 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2978 {
2979   tree op = *expr_p;
2980   struct version_info *info;
2981   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2982
2983   if (TREE_CODE (op) != SSA_NAME)
2984     return NULL_TREE;
2985
2986   info = name_info (wdata->idata, op);
2987   /* Because we expand simple operations when finding IVs, loop invariant
2988      variable that isn't referred by the original loop could be used now.
2989      Record such invariant variables here.  */
2990   if (!info->iv)
2991     {
2992       struct ivopts_data *idata = wdata->idata;
2993       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2994
2995       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2996         {
2997           tree steptype = TREE_TYPE (op);
2998           if (POINTER_TYPE_P (steptype))
2999             steptype = sizetype;
3000           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3001           record_invariant (idata, op, false);
3002         }
3003     }
3004   if (!info->inv_id || info->has_nonlin_use)
3005     return NULL_TREE;
3006
3007   if (!*wdata->inv_vars)
3008     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3009   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3010
3011   return NULL_TREE;
3012 }
3013
3014 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3015    store it.  */
3016
3017 static inline void
3018 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3019 {
3020   struct walk_tree_data wdata;
3021
3022   if (!inv_vars)
3023     return;
3024
3025   wdata.idata = data;
3026   wdata.inv_vars = inv_vars;
3027   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3028 }
3029
3030 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3031    will be recorded if it doesn't exist yet.  Given below two exprs:
3032      inv_expr + cst1, inv_expr + cst2
3033    It's hard to make decision whether constant part should be stripped
3034    or not.  We choose to not strip based on below facts:
3035      1) We need to count ADD cost for constant part if it's stripped,
3036         which isn't always trivial where this functions is called.
3037      2) Stripping constant away may be conflict with following loop
3038         invariant hoisting pass.
3039      3) Not stripping constant away results in more invariant exprs,
3040         which usually leads to decision preferring lower reg pressure.  */
3041
3042 static iv_inv_expr_ent *
3043 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3044 {
3045   STRIP_NOPS (inv_expr);
3046
3047   if (poly_int_tree_p (inv_expr)
3048       || TREE_CODE (inv_expr) == SSA_NAME)
3049     return NULL;
3050
3051   /* Don't strip constant part away as we used to.  */
3052
3053   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3054   struct iv_inv_expr_ent ent;
3055   ent.expr = inv_expr;
3056   ent.hash = iterative_hash_expr (inv_expr, 0);
3057   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3058
3059   if (!*slot)
3060     {
3061       *slot = XNEW (struct iv_inv_expr_ent);
3062       (*slot)->expr = inv_expr;
3063       (*slot)->hash = ent.hash;
3064       (*slot)->id = ++data->max_inv_expr_id;
3065     }
3066
3067   return *slot;
3068 }
3069
3070 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3071    position to POS.  If USE is not NULL, the candidate is set as related to
3072    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3073    replacement of the final value of the iv by a direct computation.  */
3074
3075 static struct iv_cand *
3076 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3077                  enum iv_position pos, struct iv_use *use,
3078                  gimple *incremented_at, struct iv *orig_iv = NULL,
3079                  bool doloop = false)
3080 {
3081   unsigned i;
3082   struct iv_cand *cand = NULL;
3083   tree type, orig_type;
3084
3085   gcc_assert (base && step);
3086
3087   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3088      live, but the ivopts code may replace a real pointer with one
3089      pointing before or after the memory block that is then adjusted
3090      into the memory block during the loop.  FIXME: It would likely be
3091      better to actually force the pointer live and still use ivopts;
3092      for example, it would be enough to write the pointer into memory
3093      and keep it there until after the loop.  */
3094   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3095     return NULL;
3096
3097   /* For non-original variables, make sure their values are computed in a type
3098      that does not invoke undefined behavior on overflows (since in general,
3099      we cannot prove that these induction variables are non-wrapping).  */
3100   if (pos != IP_ORIGINAL)
3101     {
3102       orig_type = TREE_TYPE (base);
3103       type = generic_type_for (orig_type);
3104       if (type != orig_type)
3105         {
3106           base = fold_convert (type, base);
3107           step = fold_convert (type, step);
3108         }
3109     }
3110
3111   for (i = 0; i < data->vcands.length (); i++)
3112     {
3113       cand = data->vcands[i];
3114
3115       if (cand->pos != pos)
3116         continue;
3117
3118       if (cand->incremented_at != incremented_at
3119           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3120               && cand->ainc_use != use))
3121         continue;
3122
3123       if (operand_equal_p (base, cand->iv->base, 0)
3124           && operand_equal_p (step, cand->iv->step, 0)
3125           && (TYPE_PRECISION (TREE_TYPE (base))
3126               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3127         break;
3128     }
3129
3130   if (i == data->vcands.length ())
3131     {
3132       cand = XCNEW (struct iv_cand);
3133       cand->id = i;
3134       cand->iv = alloc_iv (data, base, step);
3135       cand->pos = pos;
3136       if (pos != IP_ORIGINAL)
3137         {
3138           if (doloop)
3139             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3140           else
3141             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3142           cand->var_after = cand->var_before;
3143         }
3144       cand->important = important;
3145       cand->incremented_at = incremented_at;
3146       cand->doloop_p = doloop;
3147       data->vcands.safe_push (cand);
3148
3149       if (!poly_int_tree_p (step))
3150         {
3151           find_inv_vars (data, &step, &cand->inv_vars);
3152
3153           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3154           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3155           if (inv_expr != NULL)
3156             {
3157               cand->inv_exprs = cand->inv_vars;
3158               cand->inv_vars = NULL;
3159               if (cand->inv_exprs)
3160                 bitmap_clear (cand->inv_exprs);
3161               else
3162                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3163
3164               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3165             }
3166         }
3167
3168       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3169         cand->ainc_use = use;
3170       else
3171         cand->ainc_use = NULL;
3172
3173       cand->orig_iv = orig_iv;
3174       if (dump_file && (dump_flags & TDF_DETAILS))
3175         dump_cand (dump_file, cand);
3176     }
3177
3178   cand->important |= important;
3179   cand->doloop_p |= doloop;
3180
3181   /* Relate candidate to the group for which it is added.  */
3182   if (use)
3183     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3184
3185   return cand;
3186 }
3187
3188 /* Returns true if incrementing the induction variable at the end of the LOOP
3189    is allowed.
3190
3191    The purpose is to avoid splitting latch edge with a biv increment, thus
3192    creating a jump, possibly confusing other optimization passes and leaving
3193    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3194    available (so we do not have a better alternative), or if the latch edge
3195    is already nonempty.  */
3196
3197 static bool
3198 allow_ip_end_pos_p (class loop *loop)
3199 {
3200   if (!ip_normal_pos (loop))
3201     return true;
3202
3203   if (!empty_block_p (ip_end_pos (loop)))
3204     return true;
3205
3206   return false;
3207 }
3208
3209 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3210    Important field is set to IMPORTANT.  */
3211
3212 static void
3213 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3214                         bool important, struct iv_use *use)
3215 {
3216   basic_block use_bb = gimple_bb (use->stmt);
3217   machine_mode mem_mode;
3218   unsigned HOST_WIDE_INT cstepi;
3219
3220   /* If we insert the increment in any position other than the standard
3221      ones, we must ensure that it is incremented once per iteration.
3222      It must not be in an inner nested loop, or one side of an if
3223      statement.  */
3224   if (use_bb->loop_father != data->current_loop
3225       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3226       || stmt_can_throw_internal (cfun, use->stmt)
3227       || !cst_and_fits_in_hwi (step))
3228     return;
3229
3230   cstepi = int_cst_value (step);
3231
3232   mem_mode = TYPE_MODE (use->mem_type);
3233   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3234         || USE_STORE_PRE_INCREMENT (mem_mode))
3235        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3236       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3237            || USE_STORE_PRE_DECREMENT (mem_mode))
3238           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3239     {
3240       enum tree_code code = MINUS_EXPR;
3241       tree new_base;
3242       tree new_step = step;
3243
3244       if (POINTER_TYPE_P (TREE_TYPE (base)))
3245         {
3246           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3247           code = POINTER_PLUS_EXPR;
3248         }
3249       else
3250         new_step = fold_convert (TREE_TYPE (base), new_step);
3251       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3252       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3253                        use->stmt);
3254     }
3255   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3256         || USE_STORE_POST_INCREMENT (mem_mode))
3257        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3258       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3259            || USE_STORE_POST_DECREMENT (mem_mode))
3260           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3261     {
3262       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3263                        use->stmt);
3264     }
3265 }
3266
3267 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3268    position to POS.  If USE is not NULL, the candidate is set as related to
3269    it.  The candidate computation is scheduled before exit condition and at
3270    the end of loop.  */
3271
3272 static void
3273 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3274                struct iv_use *use, struct iv *orig_iv = NULL,
3275                bool doloop = false)
3276 {
3277   if (ip_normal_pos (data->current_loop))
3278     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3279                      doloop);
3280   /* Exclude doloop candidate here since it requires decrement then comparison
3281      and jump, the IP_END position doesn't match.  */
3282   if (!doloop && ip_end_pos (data->current_loop)
3283       && allow_ip_end_pos_p (data->current_loop))
3284     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3285 }
3286
3287 /* Adds standard iv candidates.  */
3288
3289 static void
3290 add_standard_iv_candidates (struct ivopts_data *data)
3291 {
3292   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3293
3294   /* The same for a double-integer type if it is still fast enough.  */
3295   if (TYPE_PRECISION
3296         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3297       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3298     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3299                    build_int_cst (long_integer_type_node, 1), true, NULL);
3300
3301   /* The same for a double-integer type if it is still fast enough.  */
3302   if (TYPE_PRECISION
3303         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3304       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3305     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3306                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3307 }
3308
3309
3310 /* Adds candidates bases on the old induction variable IV.  */
3311
3312 static void
3313 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3314 {
3315   gimple *phi;
3316   tree def;
3317   struct iv_cand *cand;
3318
3319   /* Check if this biv is used in address type use.  */
3320   if (iv->no_overflow  && iv->have_address_use
3321       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3322       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3323     {
3324       tree base = fold_convert (sizetype, iv->base);
3325       tree step = fold_convert (sizetype, iv->step);
3326
3327       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3328       add_candidate (data, base, step, true, NULL, iv);
3329       /* Add iv cand of the original type only if it has nonlinear use.  */
3330       if (iv->nonlin_use)
3331         add_candidate (data, iv->base, iv->step, true, NULL);
3332     }
3333   else
3334     add_candidate (data, iv->base, iv->step, true, NULL);
3335
3336   /* The same, but with initial value zero.  */
3337   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3338     add_candidate (data, size_int (0), iv->step, true, NULL);
3339   else
3340     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3341                    iv->step, true, NULL);
3342
3343   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3344   if (gimple_code (phi) == GIMPLE_PHI)
3345     {
3346       /* Additionally record the possibility of leaving the original iv
3347          untouched.  */
3348       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3349       /* Don't add candidate if it's from another PHI node because
3350          it's an affine iv appearing in the form of PEELED_CHREC.  */
3351       phi = SSA_NAME_DEF_STMT (def);
3352       if (gimple_code (phi) != GIMPLE_PHI)
3353         {
3354           cand = add_candidate_1 (data,
3355                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3356                                   SSA_NAME_DEF_STMT (def));
3357           if (cand)
3358             {
3359               cand->var_before = iv->ssa_name;
3360               cand->var_after = def;
3361             }
3362         }
3363       else
3364         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3365     }
3366 }
3367
3368 /* Adds candidates based on the old induction variables.  */
3369
3370 static void
3371 add_iv_candidate_for_bivs (struct ivopts_data *data)
3372 {
3373   unsigned i;
3374   struct iv *iv;
3375   bitmap_iterator bi;
3376
3377   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3378     {
3379       iv = ver_info (data, i)->iv;
3380       if (iv && iv->biv_p && !integer_zerop (iv->step))
3381         add_iv_candidate_for_biv (data, iv);
3382     }
3383 }
3384
3385 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3386
3387 static void
3388 record_common_cand (struct ivopts_data *data, tree base,
3389                     tree step, struct iv_use *use)
3390 {
3391   class iv_common_cand ent;
3392   class iv_common_cand **slot;
3393
3394   ent.base = base;
3395   ent.step = step;
3396   ent.hash = iterative_hash_expr (base, 0);
3397   ent.hash = iterative_hash_expr (step, ent.hash);
3398
3399   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3400   if (*slot == NULL)
3401     {
3402       *slot = new iv_common_cand ();
3403       (*slot)->base = base;
3404       (*slot)->step = step;
3405       (*slot)->uses.create (8);
3406       (*slot)->hash = ent.hash;
3407       data->iv_common_cands.safe_push ((*slot));
3408     }
3409
3410   gcc_assert (use != NULL);
3411   (*slot)->uses.safe_push (use);
3412   return;
3413 }
3414
3415 /* Comparison function used to sort common candidates.  */
3416
3417 static int
3418 common_cand_cmp (const void *p1, const void *p2)
3419 {
3420   unsigned n1, n2;
3421   const class iv_common_cand *const *const ccand1
3422     = (const class iv_common_cand *const *)p1;
3423   const class iv_common_cand *const *const ccand2
3424     = (const class iv_common_cand *const *)p2;
3425
3426   n1 = (*ccand1)->uses.length ();
3427   n2 = (*ccand2)->uses.length ();
3428   return n2 - n1;
3429 }
3430
3431 /* Adds IV candidates based on common candidated recorded.  */
3432
3433 static void
3434 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3435 {
3436   unsigned i, j;
3437   struct iv_cand *cand_1, *cand_2;
3438
3439   data->iv_common_cands.qsort (common_cand_cmp);
3440   for (i = 0; i < data->iv_common_cands.length (); i++)
3441     {
3442       class iv_common_cand *ptr = data->iv_common_cands[i];
3443
3444       /* Only add IV candidate if it's derived from multiple uses.  */
3445       if (ptr->uses.length () <= 1)
3446         break;
3447
3448       cand_1 = NULL;
3449       cand_2 = NULL;
3450       if (ip_normal_pos (data->current_loop))
3451         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3452                                   false, IP_NORMAL, NULL, NULL);
3453
3454       if (ip_end_pos (data->current_loop)
3455           && allow_ip_end_pos_p (data->current_loop))
3456         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3457                                   false, IP_END, NULL, NULL);
3458
3459       /* Bind deriving uses and the new candidates.  */
3460       for (j = 0; j < ptr->uses.length (); j++)
3461         {
3462           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3463           if (cand_1)
3464             bitmap_set_bit (group->related_cands, cand_1->id);
3465           if (cand_2)
3466             bitmap_set_bit (group->related_cands, cand_2->id);
3467         }
3468     }
3469
3470   /* Release data since it is useless from this point.  */
3471   data->iv_common_cand_tab->empty ();
3472   data->iv_common_cands.truncate (0);
3473 }
3474
3475 /* Adds candidates based on the value of USE's iv.  */
3476
3477 static void
3478 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3479 {
3480   poly_uint64 offset;
3481   tree base;
3482   tree basetype;
3483   struct iv *iv = use->iv;
3484
3485   add_candidate (data, iv->base, iv->step, false, use);
3486
3487   /* Record common candidate for use in case it can be shared by others.  */
3488   record_common_cand (data, iv->base, iv->step, use);
3489
3490   /* Record common candidate with initial value zero.  */
3491   basetype = TREE_TYPE (iv->base);
3492   if (POINTER_TYPE_P (basetype))
3493     basetype = sizetype;
3494   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3495
3496   /* Compare the cost of an address with an unscaled index with the cost of
3497     an address with a scaled index and add candidate if useful.  */
3498   poly_int64 step;
3499   if (use != NULL
3500       && poly_int_tree_p (iv->step, &step)
3501       && address_p (use->type))
3502     {
3503       poly_int64 new_step;
3504       unsigned int fact = preferred_mem_scale_factor
3505         (use->iv->base,
3506          TYPE_MODE (use->mem_type),
3507          optimize_loop_for_speed_p (data->current_loop));
3508
3509       if (fact != 1
3510           && multiple_p (step, fact, &new_step))
3511         add_candidate (data, size_int (0),
3512                        wide_int_to_tree (sizetype, new_step),
3513                        true, NULL);
3514     }
3515
3516   /* Record common candidate with constant offset stripped in base.
3517      Like the use itself, we also add candidate directly for it.  */
3518   base = strip_offset (iv->base, &offset);
3519   if (maybe_ne (offset, 0U) || base != iv->base)
3520     {
3521       record_common_cand (data, base, iv->step, use);
3522       add_candidate (data, base, iv->step, false, use);
3523     }
3524
3525   /* Record common candidate with base_object removed in base.  */
3526   base = iv->base;
3527   STRIP_NOPS (base);
3528   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3529     {
3530       tree step = iv->step;
3531
3532       STRIP_NOPS (step);
3533       base = TREE_OPERAND (base, 1);
3534       step = fold_convert (sizetype, step);
3535       record_common_cand (data, base, step, use);
3536       /* Also record common candidate with offset stripped.  */
3537       base = strip_offset (base, &offset);
3538       if (maybe_ne (offset, 0U))
3539         record_common_cand (data, base, step, use);
3540     }
3541
3542   /* At last, add auto-incremental candidates.  Make such variables
3543      important since other iv uses with same base object may be based
3544      on it.  */
3545   if (use != NULL && address_p (use->type))
3546     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3547 }
3548
3549 /* Adds candidates based on the uses.  */
3550
3551 static void
3552 add_iv_candidate_for_groups (struct ivopts_data *data)
3553 {
3554   unsigned i;
3555
3556   /* Only add candidate for the first use in group.  */
3557   for (i = 0; i < data->vgroups.length (); i++)
3558     {
3559       struct iv_group *group = data->vgroups[i];
3560
3561       gcc_assert (group->vuses[0] != NULL);
3562       add_iv_candidate_for_use (data, group->vuses[0]);
3563     }
3564   add_iv_candidate_derived_from_uses (data);
3565 }
3566
3567 /* Record important candidates and add them to related_cands bitmaps.  */
3568
3569 static void
3570 record_important_candidates (struct ivopts_data *data)
3571 {
3572   unsigned i;
3573   struct iv_group *group;
3574
3575   for (i = 0; i < data->vcands.length (); i++)
3576     {
3577       struct iv_cand *cand = data->vcands[i];
3578
3579       if (cand->important)
3580         bitmap_set_bit (data->important_candidates, i);
3581     }
3582
3583   data->consider_all_candidates = (data->vcands.length ()
3584                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3585
3586   /* Add important candidates to groups' related_cands bitmaps.  */
3587   for (i = 0; i < data->vgroups.length (); i++)
3588     {
3589       group = data->vgroups[i];
3590       bitmap_ior_into (group->related_cands, data->important_candidates);
3591     }
3592 }
3593
3594 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3595    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3596    we allocate a simple list to every use.  */
3597
3598 static void
3599 alloc_use_cost_map (struct ivopts_data *data)
3600 {
3601   unsigned i, size, s;
3602
3603   for (i = 0; i < data->vgroups.length (); i++)
3604     {
3605       struct iv_group *group = data->vgroups[i];
3606
3607       if (data->consider_all_candidates)
3608         size = data->vcands.length ();
3609       else
3610         {
3611           s = bitmap_count_bits (group->related_cands);
3612
3613           /* Round up to the power of two, so that moduling by it is fast.  */
3614           size = s ? (1 << ceil_log2 (s)) : 1;
3615         }
3616
3617       group->n_map_members = size;
3618       group->cost_map = XCNEWVEC (class cost_pair, size);
3619     }
3620 }
3621
3622 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3623    on invariants INV_VARS and that the value used in expressing it is
3624    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3625
3626 static void
3627 set_group_iv_cost (struct ivopts_data *data,
3628                    struct iv_group *group, struct iv_cand *cand,
3629                    comp_cost cost, bitmap inv_vars, tree value,
3630                    enum tree_code comp, bitmap inv_exprs)
3631 {
3632   unsigned i, s;
3633
3634   if (cost.infinite_cost_p ())
3635     {
3636       BITMAP_FREE (inv_vars);
3637       BITMAP_FREE (inv_exprs);
3638       return;
3639     }
3640
3641   if (data->consider_all_candidates)
3642     {
3643       group->cost_map[cand->id].cand = cand;
3644       group->cost_map[cand->id].cost = cost;
3645       group->cost_map[cand->id].inv_vars = inv_vars;
3646       group->cost_map[cand->id].inv_exprs = inv_exprs;
3647       group->cost_map[cand->id].value = value;
3648       group->cost_map[cand->id].comp = comp;
3649       return;
3650     }
3651
3652   /* n_map_members is a power of two, so this computes modulo.  */
3653   s = cand->id & (group->n_map_members - 1);
3654   for (i = s; i < group->n_map_members; i++)
3655     if (!group->cost_map[i].cand)
3656       goto found;
3657   for (i = 0; i < s; i++)
3658     if (!group->cost_map[i].cand)
3659       goto found;
3660
3661   gcc_unreachable ();
3662
3663 found:
3664   group->cost_map[i].cand = cand;
3665   group->cost_map[i].cost = cost;
3666   group->cost_map[i].inv_vars = inv_vars;
3667   group->cost_map[i].inv_exprs = inv_exprs;
3668   group->cost_map[i].value = value;
3669   group->cost_map[i].comp = comp;
3670 }
3671
3672 /* Gets cost of (GROUP, CAND) pair.  */
3673
3674 static class cost_pair *
3675 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3676                    struct iv_cand *cand)
3677 {
3678   unsigned i, s;
3679   class cost_pair *ret;
3680
3681   if (!cand)
3682     return NULL;
3683
3684   if (data->consider_all_candidates)
3685     {
3686       ret = group->cost_map + cand->id;
3687       if (!ret->cand)
3688         return NULL;
3689
3690       return ret;
3691     }
3692
3693   /* n_map_members is a power of two, so this computes modulo.  */
3694   s = cand->id & (group->n_map_members - 1);
3695   for (i = s; i < group->n_map_members; i++)
3696     if (group->cost_map[i].cand == cand)
3697       return group->cost_map + i;
3698     else if (group->cost_map[i].cand == NULL)
3699       return NULL;
3700   for (i = 0; i < s; i++)
3701     if (group->cost_map[i].cand == cand)
3702       return group->cost_map + i;
3703     else if (group->cost_map[i].cand == NULL)
3704       return NULL;
3705
3706   return NULL;
3707 }
3708
3709 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3710 static rtx
3711 produce_memory_decl_rtl (tree obj, int *regno)
3712 {
3713   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3714   machine_mode address_mode = targetm.addr_space.address_mode (as);
3715   rtx x;
3716
3717   gcc_assert (obj);
3718   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3719     {
3720       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3721       x = gen_rtx_SYMBOL_REF (address_mode, name);
3722       SET_SYMBOL_REF_DECL (x, obj);
3723       x = gen_rtx_MEM (DECL_MODE (obj), x);
3724       set_mem_addr_space (x, as);
3725       targetm.encode_section_info (obj, x, true);
3726     }
3727   else
3728     {
3729       x = gen_raw_REG (address_mode, (*regno)++);
3730       x = gen_rtx_MEM (DECL_MODE (obj), x);
3731       set_mem_addr_space (x, as);
3732     }
3733
3734   return x;
3735 }
3736
3737 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3738    walk_tree.  DATA contains the actual fake register number.  */
3739
3740 static tree
3741 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3742 {
3743   tree obj = NULL_TREE;
3744   rtx x = NULL_RTX;
3745   int *regno = (int *) data;
3746
3747   switch (TREE_CODE (*expr_p))
3748     {
3749     case ADDR_EXPR:
3750       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3751            handled_component_p (*expr_p);
3752            expr_p = &TREE_OPERAND (*expr_p, 0))
3753         continue;
3754       obj = *expr_p;
3755       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3756         x = produce_memory_decl_rtl (obj, regno);
3757       break;
3758
3759     case SSA_NAME:
3760       *ws = 0;
3761       obj = SSA_NAME_VAR (*expr_p);
3762       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3763       if (!obj)
3764         return NULL_TREE;
3765       if (!DECL_RTL_SET_P (obj))
3766         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3767       break;
3768
3769     case VAR_DECL:
3770     case PARM_DECL:
3771     case RESULT_DECL:
3772       *ws = 0;
3773       obj = *expr_p;
3774
3775       if (DECL_RTL_SET_P (obj))
3776         break;
3777
3778       if (DECL_MODE (obj) == BLKmode)
3779         x = produce_memory_decl_rtl (obj, regno);
3780       else
3781         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3782
3783       break;
3784
3785     default:
3786       break;
3787     }
3788
3789   if (x)
3790     {
3791       decl_rtl_to_reset.safe_push (obj);
3792       SET_DECL_RTL (obj, x);
3793     }
3794
3795   return NULL_TREE;
3796 }
3797
3798 /* Predict whether the given loop will be transformed in the RTL
3799    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3800    This is only for target independent checks, see targetm.predict_doloop_p
3801    for the target dependent ones.
3802
3803    Note that according to some initial investigation, some checks like costly
3804    niter check and invalid stmt scanning don't have much gains among general
3805    cases, so keep this as simple as possible first.
3806
3807    Some RTL specific checks seems unable to be checked in gimple, if any new
3808    checks or easy checks _are_ missing here, please add them.  */
3809
3810 static bool
3811 generic_predict_doloop_p (struct ivopts_data *data)
3812 {
3813   class loop *loop = data->current_loop;
3814
3815   /* Call target hook for target dependent checks.  */
3816   if (!targetm.predict_doloop_p (loop))
3817     {
3818       if (dump_file && (dump_flags & TDF_DETAILS))
3819         fprintf (dump_file, "Predict doloop failure due to"
3820                             " target specific checks.\n");
3821       return false;
3822     }
3823
3824   /* Similar to doloop_optimize, check iteration description to know it's
3825      suitable or not.  Keep it as simple as possible, feel free to extend it
3826      if you find any multiple exits cases matter.  */
3827   edge exit = single_dom_exit (loop);
3828   class tree_niter_desc *niter_desc;
3829   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3830     {
3831       if (dump_file && (dump_flags & TDF_DETAILS))
3832         fprintf (dump_file, "Predict doloop failure due to"
3833                             " unexpected niters.\n");
3834       return false;
3835     }
3836
3837   /* Similar to doloop_optimize, check whether iteration count too small
3838      and not profitable.  */
3839   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3840   if (est_niter == -1)
3841     est_niter = get_likely_max_loop_iterations_int (loop);
3842   if (est_niter >= 0 && est_niter < 3)
3843     {
3844       if (dump_file && (dump_flags & TDF_DETAILS))
3845         fprintf (dump_file,
3846                  "Predict doloop failure due to"
3847                  " too few iterations (%u).\n",
3848                  (unsigned int) est_niter);
3849       return false;
3850     }
3851
3852   return true;
3853 }
3854
3855 /* Determines cost of the computation of EXPR.  */
3856
3857 static unsigned
3858 computation_cost (tree expr, bool speed)
3859 {
3860   rtx_insn *seq;
3861   rtx rslt;
3862   tree type = TREE_TYPE (expr);
3863   unsigned cost;
3864   /* Avoid using hard regs in ways which may be unsupported.  */
3865   int regno = LAST_VIRTUAL_REGISTER + 1;
3866   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3867   enum node_frequency real_frequency = node->frequency;
3868
3869   node->frequency = NODE_FREQUENCY_NORMAL;
3870   crtl->maybe_hot_insn_p = speed;
3871   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3872   start_sequence ();
3873   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3874   seq = get_insns ();
3875   end_sequence ();
3876   default_rtl_profile ();
3877   node->frequency = real_frequency;
3878
3879   cost = seq_cost (seq, speed);
3880   if (MEM_P (rslt))
3881     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3882                           TYPE_ADDR_SPACE (type), speed);
3883   else if (!REG_P (rslt))
3884     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3885
3886   return cost;
3887 }
3888
3889 /* Returns variable containing the value of candidate CAND at statement AT.  */
3890
3891 static tree
3892 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3893 {
3894   if (stmt_after_increment (loop, cand, stmt))
3895     return cand->var_after;
3896   else
3897     return cand->var_before;
3898 }
3899
3900 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3901    same precision that is at least as wide as the precision of TYPE, stores
3902    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3903    type of A and B.  */
3904
3905 static tree
3906 determine_common_wider_type (tree *a, tree *b)
3907 {
3908   tree wider_type = NULL;
3909   tree suba, subb;
3910   tree atype = TREE_TYPE (*a);
3911
3912   if (CONVERT_EXPR_P (*a))
3913     {
3914       suba = TREE_OPERAND (*a, 0);
3915       wider_type = TREE_TYPE (suba);
3916       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3917         return atype;
3918     }
3919   else
3920     return atype;
3921
3922   if (CONVERT_EXPR_P (*b))
3923     {
3924       subb = TREE_OPERAND (*b, 0);
3925       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3926         return atype;
3927     }
3928   else
3929     return atype;
3930
3931   *a = suba;
3932   *b = subb;
3933   return wider_type;
3934 }
3935
3936 /* Determines the expression by that USE is expressed from induction variable
3937    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3938    decomposed form.  The invariant part is stored in AFF_INV; while variant
3939    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3940    non-null.  Returns false if USE cannot be expressed using CAND.  */
3941
3942 static bool
3943 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3944                        struct iv_cand *cand, class aff_tree *aff_inv,
3945                        class aff_tree *aff_var, widest_int *prat = NULL)
3946 {
3947   tree ubase = use->iv->base, ustep = use->iv->step;
3948   tree cbase = cand->iv->base, cstep = cand->iv->step;
3949   tree common_type, uutype, var, cstep_common;
3950   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3951   aff_tree aff_cbase;
3952   widest_int rat;
3953
3954   /* We must have a precision to express the values of use.  */
3955   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3956     return false;
3957
3958   var = var_at_stmt (loop, cand, at);
3959   uutype = unsigned_type_for (utype);
3960
3961   /* If the conversion is not noop, perform it.  */
3962   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3963     {
3964       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3965           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3966         {
3967           tree inner_base, inner_step, inner_type;
3968           inner_base = TREE_OPERAND (cbase, 0);
3969           if (CONVERT_EXPR_P (cstep))
3970             inner_step = TREE_OPERAND (cstep, 0);
3971           else
3972             inner_step = cstep;
3973
3974           inner_type = TREE_TYPE (inner_base);
3975           /* If candidate is added from a biv whose type is smaller than
3976              ctype, we know both candidate and the biv won't overflow.
3977              In this case, it's safe to skip the convertion in candidate.
3978              As an example, (unsigned short)((unsigned long)A) equals to
3979              (unsigned short)A, if A has a type no larger than short.  */
3980           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3981             {
3982               cbase = inner_base;
3983               cstep = inner_step;
3984             }
3985         }
3986       cbase = fold_convert (uutype, cbase);
3987       cstep = fold_convert (uutype, cstep);
3988       var = fold_convert (uutype, var);
3989     }
3990
3991   /* Ratio is 1 when computing the value of biv cand by itself.
3992      We can't rely on constant_multiple_of in this case because the
3993      use is created after the original biv is selected.  The call
3994      could fail because of inconsistent fold behavior.  See PR68021
3995      for more information.  */
3996   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3997     {
3998       gcc_assert (is_gimple_assign (use->stmt));
3999       gcc_assert (use->iv->ssa_name == cand->var_after);
4000       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4001       rat = 1;
4002     }
4003   else if (!constant_multiple_of (ustep, cstep, &rat))
4004     return false;
4005
4006   if (prat)
4007     *prat = rat;
4008
4009   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4010      type, we achieve better folding by computing their difference in this
4011      wider type, and cast the result to UUTYPE.  We do not need to worry about
4012      overflows, as all the arithmetics will in the end be performed in UUTYPE
4013      anyway.  */
4014   common_type = determine_common_wider_type (&ubase, &cbase);
4015
4016   /* use = ubase - ratio * cbase + ratio * var.  */
4017   tree_to_aff_combination (ubase, common_type, aff_inv);
4018   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4019   tree_to_aff_combination (var, uutype, aff_var);
4020
4021   /* We need to shift the value if we are after the increment.  */
4022   if (stmt_after_increment (loop, cand, at))
4023     {
4024       aff_tree cstep_aff;
4025
4026       if (common_type != uutype)
4027         cstep_common = fold_convert (common_type, cstep);
4028       else
4029         cstep_common = cstep;
4030
4031       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4032       aff_combination_add (&aff_cbase, &cstep_aff);
4033     }
4034
4035   aff_combination_scale (&aff_cbase, -rat);
4036   aff_combination_add (aff_inv, &aff_cbase);
4037   if (common_type != uutype)
4038     aff_combination_convert (aff_inv, uutype);
4039
4040   aff_combination_scale (aff_var, rat);
4041   return true;
4042 }
4043
4044 /* Determines the expression by that USE is expressed from induction variable
4045    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4046    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4047
4048 static bool
4049 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4050                      struct iv_cand *cand, class aff_tree *aff)
4051 {
4052   aff_tree aff_var;
4053
4054   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4055     return false;
4056
4057   aff_combination_add (aff, &aff_var);
4058   return true;
4059 }
4060
4061 /* Return the type of USE.  */
4062
4063 static tree
4064 get_use_type (struct iv_use *use)
4065 {
4066   tree base_type = TREE_TYPE (use->iv->base);
4067   tree type;
4068
4069   if (use->type == USE_REF_ADDRESS)
4070     {
4071       /* The base_type may be a void pointer.  Create a pointer type based on
4072          the mem_ref instead.  */
4073       type = build_pointer_type (TREE_TYPE (*use->op_p));
4074       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4075                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4076     }
4077   else
4078     type = base_type;
4079
4080   return type;
4081 }
4082
4083 /* Determines the expression by that USE is expressed from induction variable
4084    CAND at statement AT in LOOP.  The computation is unshared.  */
4085
4086 static tree
4087 get_computation_at (class loop *loop, gimple *at,
4088                     struct iv_use *use, struct iv_cand *cand)
4089 {
4090   aff_tree aff;
4091   tree type = get_use_type (use);
4092
4093   if (!get_computation_aff (loop, at, use, cand, &aff))
4094     return NULL_TREE;
4095   unshare_aff_combination (&aff);
4096   return fold_convert (type, aff_combination_to_tree (&aff));
4097 }
4098
4099 /* Like get_computation_at, but try harder, even if the computation
4100    is more expensive.  Intended for debug stmts.  */
4101
4102 static tree
4103 get_debug_computation_at (class loop *loop, gimple *at,
4104                           struct iv_use *use, struct iv_cand *cand)
4105 {
4106   if (tree ret = get_computation_at (loop, at, use, cand))
4107     return ret;
4108
4109   tree ubase = use->iv->base, ustep = use->iv->step;
4110   tree cbase = cand->iv->base, cstep = cand->iv->step;
4111   tree var;
4112   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4113   widest_int rat;
4114
4115   /* We must have a precision to express the values of use.  */
4116   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4117     return NULL_TREE;
4118
4119   /* Try to handle the case that get_computation_at doesn't,
4120      try to express
4121      use = ubase + (var - cbase) / ratio.  */
4122   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4123                              &rat))
4124     return NULL_TREE;
4125
4126   bool neg_p = false;
4127   if (wi::neg_p (rat))
4128     {
4129       if (TYPE_UNSIGNED (ctype))
4130         return NULL_TREE;
4131       neg_p = true;
4132       rat = wi::neg (rat);
4133     }
4134
4135   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4136      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4137      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4138      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4139      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4140   if (!use->iv->no_overflow
4141       && !cand->iv->no_overflow
4142       && !integer_pow2p (cstep))
4143     return NULL_TREE;
4144
4145   int bits = wi::exact_log2 (rat);
4146   if (bits == -1)
4147     bits = wi::floor_log2 (rat) + 1;
4148   if (!cand->iv->no_overflow
4149       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4150     return NULL_TREE;
4151
4152   var = var_at_stmt (loop, cand, at);
4153
4154   if (POINTER_TYPE_P (ctype))
4155     {
4156       ctype = unsigned_type_for (ctype);
4157       cbase = fold_convert (ctype, cbase);
4158       cstep = fold_convert (ctype, cstep);
4159       var = fold_convert (ctype, var);
4160     }
4161
4162   if (stmt_after_increment (loop, cand, at))
4163     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4164                        unshare_expr (cstep));
4165
4166   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4167   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4168                      wide_int_to_tree (TREE_TYPE (var), rat));
4169   if (POINTER_TYPE_P (utype))
4170     {
4171       var = fold_convert (sizetype, var);
4172       if (neg_p)
4173         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4174       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4175     }
4176   else
4177     {
4178       var = fold_convert (utype, var);
4179       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4180                          ubase, var);
4181     }
4182   return var;
4183 }
4184
4185 /* Adjust the cost COST for being in loop setup rather than loop body.
4186    If we're optimizing for space, the loop setup overhead is constant;
4187    if we're optimizing for speed, amortize it over the per-iteration cost.
4188    If ROUND_UP_P is true, the result is round up rather than to zero when
4189    optimizing for speed.  */
4190 static int64_t
4191 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4192                    bool round_up_p = false)
4193 {
4194   if (cost == INFTY)
4195     return cost;
4196   else if (optimize_loop_for_speed_p (data->current_loop))
4197     {
4198       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4199       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4200     }
4201   else
4202     return cost;
4203 }
4204
4205 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4206    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4207    calculating the operands of EXPR.  Returns true if successful, and returns
4208    the cost in COST.  */
4209
4210 static bool
4211 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4212                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4213 {
4214   comp_cost res;
4215   tree op1 = TREE_OPERAND (expr, 1);
4216   tree cst = TREE_OPERAND (mult, 1);
4217   tree multop = TREE_OPERAND (mult, 0);
4218   int m = exact_log2 (int_cst_value (cst));
4219   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4220   int as_cost, sa_cost;
4221   bool mult_in_op1;
4222
4223   if (!(m >= 0 && m < maxm))
4224     return false;
4225
4226   STRIP_NOPS (op1);
4227   mult_in_op1 = operand_equal_p (op1, mult, 0);
4228
4229   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4230
4231   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4232      use that in preference to a shift insn followed by an add insn.  */
4233   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4234              ? shiftadd_cost (speed, mode, m)
4235              : (mult_in_op1
4236                 ? shiftsub1_cost (speed, mode, m)
4237                 : shiftsub0_cost (speed, mode, m)));
4238
4239   res = comp_cost (MIN (as_cost, sa_cost), 0);
4240   res += (mult_in_op1 ? cost0 : cost1);
4241
4242   STRIP_NOPS (multop);
4243   if (!is_gimple_val (multop))
4244     res += force_expr_to_var_cost (multop, speed);
4245
4246   *cost = res;
4247   return true;
4248 }
4249
4250 /* Estimates cost of forcing expression EXPR into a variable.  */
4251
4252 static comp_cost
4253 force_expr_to_var_cost (tree expr, bool speed)
4254 {
4255   static bool costs_initialized = false;
4256   static unsigned integer_cost [2];
4257   static unsigned symbol_cost [2];
4258   static unsigned address_cost [2];
4259   tree op0, op1;
4260   comp_cost cost0, cost1, cost;
4261   machine_mode mode;
4262   scalar_int_mode int_mode;
4263
4264   if (!costs_initialized)
4265     {
4266       tree type = build_pointer_type (integer_type_node);
4267       tree var, addr;
4268       rtx x;
4269       int i;
4270
4271       var = create_tmp_var_raw (integer_type_node, "test_var");
4272       TREE_STATIC (var) = 1;
4273       x = produce_memory_decl_rtl (var, NULL);
4274       SET_DECL_RTL (var, x);
4275
4276       addr = build1 (ADDR_EXPR, type, var);
4277
4278
4279       for (i = 0; i < 2; i++)
4280         {
4281           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4282                                                              2000), i);
4283
4284           symbol_cost[i] = computation_cost (addr, i) + 1;
4285
4286           address_cost[i]
4287             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4288           if (dump_file && (dump_flags & TDF_DETAILS))
4289             {
4290               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4291               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4292               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4293               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4294               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4295               fprintf (dump_file, "\n");
4296             }
4297         }
4298
4299       costs_initialized = true;
4300     }
4301
4302   STRIP_NOPS (expr);
4303
4304   if (SSA_VAR_P (expr))
4305     return no_cost;
4306
4307   if (is_gimple_min_invariant (expr))
4308     {
4309       if (poly_int_tree_p (expr))
4310         return comp_cost (integer_cost [speed], 0);
4311
4312       if (TREE_CODE (expr) == ADDR_EXPR)
4313         {
4314           tree obj = TREE_OPERAND (expr, 0);
4315
4316           if (VAR_P (obj)
4317               || TREE_CODE (obj) == PARM_DECL
4318               || TREE_CODE (obj) == RESULT_DECL)
4319             return comp_cost (symbol_cost [speed], 0);
4320         }
4321
4322       return comp_cost (address_cost [speed], 0);
4323     }
4324
4325   switch (TREE_CODE (expr))
4326     {
4327     case POINTER_PLUS_EXPR:
4328     case PLUS_EXPR:
4329     case MINUS_EXPR:
4330     case MULT_EXPR:
4331     case TRUNC_DIV_EXPR:
4332     case BIT_AND_EXPR:
4333     case BIT_IOR_EXPR:
4334     case LSHIFT_EXPR:
4335     case RSHIFT_EXPR:
4336       op0 = TREE_OPERAND (expr, 0);
4337       op1 = TREE_OPERAND (expr, 1);
4338       STRIP_NOPS (op0);
4339       STRIP_NOPS (op1);
4340       break;
4341
4342     CASE_CONVERT:
4343     case NEGATE_EXPR:
4344     case BIT_NOT_EXPR:
4345       op0 = TREE_OPERAND (expr, 0);
4346       STRIP_NOPS (op0);
4347       op1 = NULL_TREE;
4348       break;
4349     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4350        introduce COND_EXPR for IV base, need to support better cost estimation
4351        for this COND_EXPR and tcc_comparison.  */
4352     case COND_EXPR:
4353       op0 = TREE_OPERAND (expr, 1);
4354       STRIP_NOPS (op0);
4355       op1 = TREE_OPERAND (expr, 2);
4356       STRIP_NOPS (op1);
4357       break;
4358     case LT_EXPR:
4359     case LE_EXPR:
4360     case GT_EXPR:
4361     case GE_EXPR:
4362     case EQ_EXPR:
4363     case NE_EXPR:
4364     case UNORDERED_EXPR:
4365     case ORDERED_EXPR:
4366     case UNLT_EXPR:
4367     case UNLE_EXPR:
4368     case UNGT_EXPR:
4369     case UNGE_EXPR:
4370     case UNEQ_EXPR:
4371     case LTGT_EXPR:
4372     case MAX_EXPR:
4373     case MIN_EXPR:
4374       op0 = TREE_OPERAND (expr, 0);
4375       STRIP_NOPS (op0);
4376       op1 = TREE_OPERAND (expr, 1);
4377       STRIP_NOPS (op1);
4378       break;
4379
4380     default:
4381       /* Just an arbitrary value, FIXME.  */
4382       return comp_cost (target_spill_cost[speed], 0);
4383     }
4384
4385   if (op0 == NULL_TREE
4386       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4387     cost0 = no_cost;
4388   else
4389     cost0 = force_expr_to_var_cost (op0, speed);
4390
4391   if (op1 == NULL_TREE
4392       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4393     cost1 = no_cost;
4394   else
4395     cost1 = force_expr_to_var_cost (op1, speed);
4396
4397   mode = TYPE_MODE (TREE_TYPE (expr));
4398   switch (TREE_CODE (expr))
4399     {
4400     case POINTER_PLUS_EXPR:
4401     case PLUS_EXPR:
4402     case MINUS_EXPR:
4403     case NEGATE_EXPR:
4404       cost = comp_cost (add_cost (speed, mode), 0);
4405       if (TREE_CODE (expr) != NEGATE_EXPR)
4406         {
4407           tree mult = NULL_TREE;
4408           comp_cost sa_cost;
4409           if (TREE_CODE (op1) == MULT_EXPR)
4410             mult = op1;
4411           else if (TREE_CODE (op0) == MULT_EXPR)
4412             mult = op0;
4413
4414           if (mult != NULL_TREE
4415               && is_a <scalar_int_mode> (mode, &int_mode)
4416               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4417               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4418                                     speed, &sa_cost))
4419             return sa_cost;
4420         }
4421       break;
4422
4423     CASE_CONVERT:
4424       {
4425         tree inner_mode, outer_mode;
4426         outer_mode = TREE_TYPE (expr);
4427         inner_mode = TREE_TYPE (op0);
4428         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4429                                        TYPE_MODE (inner_mode), speed), 0);
4430       }
4431       break;
4432
4433     case MULT_EXPR:
4434       if (cst_and_fits_in_hwi (op0))
4435         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4436                                              mode, speed), 0);
4437       else if (cst_and_fits_in_hwi (op1))
4438         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4439                                              mode, speed), 0);
4440       else
4441         return comp_cost (target_spill_cost [speed], 0);
4442       break;
4443
4444     case TRUNC_DIV_EXPR:
4445       /* Division by power of two is usually cheap, so we allow it.  Forbid
4446          anything else.  */
4447       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4448         cost = comp_cost (add_cost (speed, mode), 0);
4449       else
4450         cost = comp_cost (target_spill_cost[speed], 0);
4451       break;
4452
4453     case BIT_AND_EXPR:
4454     case BIT_IOR_EXPR:
4455     case BIT_NOT_EXPR:
4456     case LSHIFT_EXPR:
4457     case RSHIFT_EXPR:
4458       cost = comp_cost (add_cost (speed, mode), 0);
4459       break;
4460     case COND_EXPR:
4461       op0 = TREE_OPERAND (expr, 0);
4462       STRIP_NOPS (op0);
4463       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4464           || CONSTANT_CLASS_P (op0))
4465         cost = no_cost;
4466       else
4467         cost = force_expr_to_var_cost (op0, speed);
4468       break;
4469     case LT_EXPR:
4470     case LE_EXPR:
4471     case GT_EXPR:
4472     case GE_EXPR:
4473     case EQ_EXPR:
4474     case NE_EXPR:
4475     case UNORDERED_EXPR:
4476     case ORDERED_EXPR:
4477     case UNLT_EXPR:
4478     case UNLE_EXPR:
4479     case UNGT_EXPR:
4480     case UNGE_EXPR:
4481     case UNEQ_EXPR:
4482     case LTGT_EXPR:
4483     case MAX_EXPR:
4484     case MIN_EXPR:
4485       /* Simply use add cost for now, FIXME if there is some more accurate cost
4486          evaluation way.  */
4487       cost = comp_cost (add_cost (speed, mode), 0);
4488       break;
4489
4490     default:
4491       gcc_unreachable ();
4492     }
4493
4494   cost += cost0;
4495   cost += cost1;
4496   return cost;
4497 }
4498
4499 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4500    invariants the computation depends on.  */
4501
4502 static comp_cost
4503 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4504 {
4505   if (!expr)
4506     return no_cost;
4507
4508   find_inv_vars (data, &expr, inv_vars);
4509   return force_expr_to_var_cost (expr, data->speed);
4510 }
4511
4512 /* Returns cost of auto-modifying address expression in shape base + offset.
4513    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4514    address expression.  The address expression has ADDR_MODE in addr space
4515    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4516    speed or size.  */
4517
4518 enum ainc_type
4519 {
4520   AINC_PRE_INC,         /* Pre increment.  */
4521   AINC_PRE_DEC,         /* Pre decrement.  */
4522   AINC_POST_INC,        /* Post increment.  */
4523   AINC_POST_DEC,        /* Post decrement.  */
4524   AINC_NONE             /* Also the number of auto increment types.  */
4525 };
4526
4527 struct ainc_cost_data
4528 {
4529   int64_t costs[AINC_NONE];
4530 };
4531
4532 static comp_cost
4533 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4534                        machine_mode addr_mode, machine_mode mem_mode,
4535                        addr_space_t as, bool speed)
4536 {
4537   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4538       && !USE_STORE_PRE_DECREMENT (mem_mode)
4539       && !USE_LOAD_POST_DECREMENT (mem_mode)
4540       && !USE_STORE_POST_DECREMENT (mem_mode)
4541       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4542       && !USE_STORE_PRE_INCREMENT (mem_mode)
4543       && !USE_LOAD_POST_INCREMENT (mem_mode)
4544       && !USE_STORE_POST_INCREMENT (mem_mode))
4545     return infinite_cost;
4546
4547   static vec<ainc_cost_data *> ainc_cost_data_list;
4548   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4549   if (idx >= ainc_cost_data_list.length ())
4550     {
4551       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4552
4553       gcc_assert (nsize > idx);
4554       ainc_cost_data_list.safe_grow_cleared (nsize);
4555     }
4556
4557   ainc_cost_data *data = ainc_cost_data_list[idx];
4558   if (data == NULL)
4559     {
4560       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4561
4562       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4563       data->costs[AINC_PRE_DEC] = INFTY;
4564       data->costs[AINC_POST_DEC] = INFTY;
4565       data->costs[AINC_PRE_INC] = INFTY;
4566       data->costs[AINC_POST_INC] = INFTY;
4567       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4568           || USE_STORE_PRE_DECREMENT (mem_mode))
4569         {
4570           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4571
4572           if (memory_address_addr_space_p (mem_mode, addr, as))
4573             data->costs[AINC_PRE_DEC]
4574               = address_cost (addr, mem_mode, as, speed);
4575         }
4576       if (USE_LOAD_POST_DECREMENT (mem_mode)
4577           || USE_STORE_POST_DECREMENT (mem_mode))
4578         {
4579           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4580
4581           if (memory_address_addr_space_p (mem_mode, addr, as))
4582             data->costs[AINC_POST_DEC]
4583               = address_cost (addr, mem_mode, as, speed);
4584         }
4585       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4586           || USE_STORE_PRE_INCREMENT (mem_mode))
4587         {
4588           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4589
4590           if (memory_address_addr_space_p (mem_mode, addr, as))
4591             data->costs[AINC_PRE_INC]
4592               = address_cost (addr, mem_mode, as, speed);
4593         }
4594       if (USE_LOAD_POST_INCREMENT (mem_mode)
4595           || USE_STORE_POST_INCREMENT (mem_mode))
4596         {
4597           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4598
4599           if (memory_address_addr_space_p (mem_mode, addr, as))
4600             data->costs[AINC_POST_INC]
4601               = address_cost (addr, mem_mode, as, speed);
4602         }
4603       ainc_cost_data_list[idx] = data;
4604     }
4605
4606   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4607   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4608     return comp_cost (data->costs[AINC_POST_INC], 0);
4609   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4610     return comp_cost (data->costs[AINC_POST_DEC], 0);
4611   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4612     return comp_cost (data->costs[AINC_PRE_INC], 0);
4613   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4614     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4615
4616   return infinite_cost;
4617 }
4618
4619 /* Return cost of computing USE's address expression by using CAND.
4620    AFF_INV and AFF_VAR represent invariant and variant parts of the
4621    address expression, respectively.  If AFF_INV is simple, store
4622    the loop invariant variables which are depended by it in INV_VARS;
4623    if AFF_INV is complicated, handle it as a new invariant expression
4624    and record it in INV_EXPR.  RATIO indicates multiple times between
4625    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4626    value to it indicating if this is an auto-increment address.  */
4627
4628 static comp_cost
4629 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4630                   struct iv_cand *cand, aff_tree *aff_inv,
4631                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4632                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4633                   bool *can_autoinc, bool speed)
4634 {
4635   rtx addr;
4636   bool simple_inv = true;
4637   tree comp_inv = NULL_TREE, type = aff_var->type;
4638   comp_cost var_cost = no_cost, cost = no_cost;
4639   struct mem_address parts = {NULL_TREE, integer_one_node,
4640                               NULL_TREE, NULL_TREE, NULL_TREE};
4641   machine_mode addr_mode = TYPE_MODE (type);
4642   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4643   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4644   /* Only true if ratio != 1.  */
4645   bool ok_with_ratio_p = false;
4646   bool ok_without_ratio_p = false;
4647
4648   if (!aff_combination_const_p (aff_inv))
4649     {
4650       parts.index = integer_one_node;
4651       /* Addressing mode "base + index".  */
4652       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4653       if (ratio != 1)
4654         {
4655           parts.step = wide_int_to_tree (type, ratio);
4656           /* Addressing mode "base + index << scale".  */
4657           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4658           if (!ok_with_ratio_p)
4659             parts.step = NULL_TREE;
4660         }
4661       if (ok_with_ratio_p || ok_without_ratio_p)
4662         {
4663           if (maybe_ne (aff_inv->offset, 0))
4664             {
4665               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4666               /* Addressing mode "base + index [<< scale] + offset".  */
4667               if (!valid_mem_ref_p (mem_mode, as, &parts))
4668                 parts.offset = NULL_TREE;
4669               else
4670                 aff_inv->offset = 0;
4671             }
4672
4673           move_fixed_address_to_symbol (&parts, aff_inv);
4674           /* Base is fixed address and is moved to symbol part.  */
4675           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4676             parts.base = NULL_TREE;
4677
4678           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4679           if (parts.symbol != NULL_TREE
4680               && !valid_mem_ref_p (mem_mode, as, &parts))
4681             {
4682               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4683               parts.symbol = NULL_TREE;
4684               /* Reset SIMPLE_INV since symbol address needs to be computed
4685                  outside of address expression in this case.  */
4686               simple_inv = false;
4687               /* Symbol part is moved back to base part, it can't be NULL.  */
4688               parts.base = integer_one_node;
4689             }
4690         }
4691       else
4692         parts.index = NULL_TREE;
4693     }
4694   else
4695     {
4696       poly_int64 ainc_step;
4697       if (can_autoinc
4698           && ratio == 1
4699           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4700         {
4701           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4702
4703           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4704             ainc_offset += ainc_step;
4705           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4706                                         addr_mode, mem_mode, as, speed);
4707           if (!cost.infinite_cost_p ())
4708             {
4709               *can_autoinc = true;
4710               return cost;
4711             }
4712           cost = no_cost;
4713         }
4714       if (!aff_combination_zero_p (aff_inv))
4715         {
4716           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4717           /* Addressing mode "base + offset".  */
4718           if (!valid_mem_ref_p (mem_mode, as, &parts))
4719             parts.offset = NULL_TREE;
4720           else
4721             aff_inv->offset = 0;
4722         }
4723     }
4724
4725   if (simple_inv)
4726     simple_inv = (aff_inv == NULL
4727                   || aff_combination_const_p (aff_inv)
4728                   || aff_combination_singleton_var_p (aff_inv));
4729   if (!aff_combination_zero_p (aff_inv))
4730     comp_inv = aff_combination_to_tree (aff_inv);
4731   if (comp_inv != NULL_TREE)
4732     cost = force_var_cost (data, comp_inv, inv_vars);
4733   if (ratio != 1 && parts.step == NULL_TREE)
4734     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4735   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4736     var_cost += add_cost (speed, addr_mode);
4737
4738   if (comp_inv && inv_expr && !simple_inv)
4739     {
4740       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4741       /* Clear depends on.  */
4742       if (*inv_expr != NULL && inv_vars && *inv_vars)
4743         bitmap_clear (*inv_vars);
4744
4745       /* Cost of small invariant expression adjusted against loop niters
4746          is usually zero, which makes it difficult to be differentiated
4747          from candidate based on loop invariant variables.  Secondly, the
4748          generated invariant expression may not be hoisted out of loop by
4749          following pass.  We penalize the cost by rounding up in order to
4750          neutralize such effects.  */
4751       cost.cost = adjust_setup_cost (data, cost.cost, true);
4752       cost.scratch = cost.cost;
4753     }
4754
4755   cost += var_cost;
4756   addr = addr_for_mem_ref (&parts, as, false);
4757   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4758   cost += address_cost (addr, mem_mode, as, speed);
4759
4760   if (parts.symbol != NULL_TREE)
4761     cost.complexity += 1;
4762   /* Don't increase the complexity of adding a scaled index if it's
4763      the only kind of index that the target allows.  */
4764   if (parts.step != NULL_TREE && ok_without_ratio_p)
4765     cost.complexity += 1;
4766   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4767     cost.complexity += 1;
4768   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4769     cost.complexity += 1;
4770
4771   return cost;
4772 }
4773
4774 /* Scale (multiply) the computed COST (except scratch part that should be
4775    hoisted out a loop) by header->frequency / AT->frequency, which makes
4776    expected cost more accurate.  */
4777
4778 static comp_cost
4779 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4780 {
4781   if (data->speed
4782       && data->current_loop->header->count.to_frequency (cfun) > 0)
4783     {
4784       basic_block bb = gimple_bb (at);
4785       gcc_assert (cost.scratch <= cost.cost);
4786       int scale_factor = (int)(intptr_t) bb->aux;
4787       if (scale_factor == 1)
4788         return cost;
4789
4790       int64_t scaled_cost
4791         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4792
4793       if (dump_file && (dump_flags & TDF_DETAILS))
4794         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4795                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4796                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4797
4798       cost.cost = scaled_cost;
4799     }
4800
4801   return cost;
4802 }
4803
4804 /* Determines the cost of the computation by that USE is expressed
4805    from induction variable CAND.  If ADDRESS_P is true, we just need
4806    to create an address from it, otherwise we want to get it into
4807    register.  A set of invariants we depend on is stored in INV_VARS.
4808    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4809    addressing is likely.  If INV_EXPR is nonnull, record invariant
4810    expr entry in it.  */
4811
4812 static comp_cost
4813 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4814                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4815                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4816 {
4817   gimple *at = use->stmt;
4818   tree ubase = use->iv->base, cbase = cand->iv->base;
4819   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4820   tree comp_inv = NULL_TREE;
4821   HOST_WIDE_INT ratio, aratio;
4822   comp_cost cost;
4823   widest_int rat;
4824   aff_tree aff_inv, aff_var;
4825   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4826
4827   if (inv_vars)
4828     *inv_vars = NULL;
4829   if (can_autoinc)
4830     *can_autoinc = false;
4831   if (inv_expr)
4832     *inv_expr = NULL;
4833
4834   /* Check if we have enough precision to express the values of use.  */
4835   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4836     return infinite_cost;
4837
4838   if (address_p
4839       || (use->iv->base_object
4840           && cand->iv->base_object
4841           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4842           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4843     {
4844       /* Do not try to express address of an object with computation based
4845          on address of a different object.  This may cause problems in rtl
4846          level alias analysis (that does not expect this to be happening,
4847          as this is illegal in C), and would be unlikely to be useful
4848          anyway.  */
4849       if (use->iv->base_object
4850           && cand->iv->base_object
4851           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4852         return infinite_cost;
4853     }
4854
4855   if (!get_computation_aff_1 (data->current_loop, at, use,
4856                               cand, &aff_inv, &aff_var, &rat)
4857       || !wi::fits_shwi_p (rat))
4858     return infinite_cost;
4859
4860   ratio = rat.to_shwi ();
4861   if (address_p)
4862     {
4863       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4864                                inv_vars, inv_expr, can_autoinc, speed);
4865       cost = get_scaled_computation_cost_at (data, at, cost);
4866       /* For doloop IV cand, add on the extra cost.  */
4867       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4868       return cost;
4869     }
4870
4871   bool simple_inv = (aff_combination_const_p (&aff_inv)
4872                      || aff_combination_singleton_var_p (&aff_inv));
4873   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4874   aff_combination_convert (&aff_inv, signed_type);
4875   if (!aff_combination_zero_p (&aff_inv))
4876     comp_inv = aff_combination_to_tree (&aff_inv);
4877
4878   cost = force_var_cost (data, comp_inv, inv_vars);
4879   if (comp_inv && inv_expr && !simple_inv)
4880     {
4881       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4882       /* Clear depends on.  */
4883       if (*inv_expr != NULL && inv_vars && *inv_vars)
4884         bitmap_clear (*inv_vars);
4885
4886       cost.cost = adjust_setup_cost (data, cost.cost);
4887       /* Record setup cost in scratch field.  */
4888       cost.scratch = cost.cost;
4889     }
4890   /* Cost of constant integer can be covered when adding invariant part to
4891      variant part.  */
4892   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4893     cost = no_cost;
4894
4895   /* Need type narrowing to represent use with cand.  */
4896   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4897     {
4898       machine_mode outer_mode = TYPE_MODE (utype);
4899       machine_mode inner_mode = TYPE_MODE (ctype);
4900       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4901     }
4902
4903   /* Turn a + i * (-c) into a - i * c.  */
4904   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4905     aratio = -ratio;
4906   else
4907     aratio = ratio;
4908
4909   if (ratio != 1)
4910     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4911
4912   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4913      instruction.  */
4914   /* Need to add up the invariant and variant parts.  */
4915   if (comp_inv && !integer_zerop (comp_inv))
4916     cost += add_cost (speed, TYPE_MODE (utype));
4917
4918   cost = get_scaled_computation_cost_at (data, at, cost);
4919
4920   /* For doloop IV cand, add on the extra cost.  */
4921   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4922     cost += targetm.doloop_cost_for_generic;
4923
4924   return cost;
4925 }
4926
4927 /* Determines cost of computing the use in GROUP with CAND in a generic
4928    expression.  */
4929
4930 static bool
4931 determine_group_iv_cost_generic (struct ivopts_data *data,
4932                                  struct iv_group *group, struct iv_cand *cand)
4933 {
4934   comp_cost cost;
4935   iv_inv_expr_ent *inv_expr = NULL;
4936   bitmap inv_vars = NULL, inv_exprs = NULL;
4937   struct iv_use *use = group->vuses[0];
4938
4939   /* The simple case first -- if we need to express value of the preserved
4940      original biv, the cost is 0.  This also prevents us from counting the
4941      cost of increment twice -- once at this use and once in the cost of
4942      the candidate.  */
4943   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4944     cost = no_cost;
4945   else
4946     cost = get_computation_cost (data, use, cand, false,
4947                                  &inv_vars, NULL, &inv_expr);
4948
4949   if (inv_expr)
4950     {
4951       inv_exprs = BITMAP_ALLOC (NULL);
4952       bitmap_set_bit (inv_exprs, inv_expr->id);
4953     }
4954   set_group_iv_cost (data, group, cand, cost, inv_vars,
4955                      NULL_TREE, ERROR_MARK, inv_exprs);
4956   return !cost.infinite_cost_p ();
4957 }
4958
4959 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4960
4961 static bool
4962 determine_group_iv_cost_address (struct ivopts_data *data,
4963                                  struct iv_group *group, struct iv_cand *cand)
4964 {
4965   unsigned i;
4966   bitmap inv_vars = NULL, inv_exprs = NULL;
4967   bool can_autoinc;
4968   iv_inv_expr_ent *inv_expr = NULL;
4969   struct iv_use *use = group->vuses[0];
4970   comp_cost sum_cost = no_cost, cost;
4971
4972   cost = get_computation_cost (data, use, cand, true,
4973                                &inv_vars, &can_autoinc, &inv_expr);
4974
4975   if (inv_expr)
4976     {
4977       inv_exprs = BITMAP_ALLOC (NULL);
4978       bitmap_set_bit (inv_exprs, inv_expr->id);
4979     }
4980   sum_cost = cost;
4981   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4982     {
4983       if (can_autoinc)
4984         sum_cost -= cand->cost_step;
4985       /* If we generated the candidate solely for exploiting autoincrement
4986          opportunities, and it turns out it can't be used, set the cost to
4987          infinity to make sure we ignore it.  */
4988       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4989         sum_cost = infinite_cost;
4990     }
4991
4992   /* Uses in a group can share setup code, so only add setup cost once.  */
4993   cost -= cost.scratch;
4994   /* Compute and add costs for rest uses of this group.  */
4995   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4996     {
4997       struct iv_use *next = group->vuses[i];
4998
4999       /* TODO: We could skip computing cost for sub iv_use when it has the
5000          same cost as the first iv_use, but the cost really depends on the
5001          offset and where the iv_use is.  */
5002         cost = get_computation_cost (data, next, cand, true,
5003                                      NULL, &can_autoinc, &inv_expr);
5004         if (inv_expr)
5005           {
5006             if (!inv_exprs)
5007               inv_exprs = BITMAP_ALLOC (NULL);
5008
5009             bitmap_set_bit (inv_exprs, inv_expr->id);
5010           }
5011       sum_cost += cost;
5012     }
5013   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5014                      NULL_TREE, ERROR_MARK, inv_exprs);
5015
5016   return !sum_cost.infinite_cost_p ();
5017 }
5018
5019 /* Computes value of candidate CAND at position AT in iteration NITER, and
5020    stores it to VAL.  */
5021
5022 static void
5023 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5024                aff_tree *val)
5025 {
5026   aff_tree step, delta, nit;
5027   struct iv *iv = cand->iv;
5028   tree type = TREE_TYPE (iv->base);
5029   tree steptype;
5030   if (POINTER_TYPE_P (type))
5031     steptype = sizetype;
5032   else
5033     steptype = unsigned_type_for (type);
5034
5035   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5036   aff_combination_convert (&step, steptype);
5037   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5038   aff_combination_convert (&nit, steptype);
5039   aff_combination_mult (&nit, &step, &delta);
5040   if (stmt_after_increment (loop, cand, at))
5041     aff_combination_add (&delta, &step);
5042
5043   tree_to_aff_combination (iv->base, type, val);
5044   if (!POINTER_TYPE_P (type))
5045     aff_combination_convert (val, steptype);
5046   aff_combination_add (val, &delta);
5047 }
5048
5049 /* Returns period of induction variable iv.  */
5050
5051 static tree
5052 iv_period (struct iv *iv)
5053 {
5054   tree step = iv->step, period, type;
5055   tree pow2div;
5056
5057   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5058
5059   type = unsigned_type_for (TREE_TYPE (step));
5060   /* Period of the iv is lcm (step, type_range)/step -1,
5061      i.e., N*type_range/step - 1. Since type range is power
5062      of two, N == (step >> num_of_ending_zeros_binary (step),
5063      so the final result is
5064
5065        (type_range >> num_of_ending_zeros_binary (step)) - 1
5066
5067   */
5068   pow2div = num_ending_zeros (step);
5069
5070   period = build_low_bits_mask (type,
5071                                 (TYPE_PRECISION (type)
5072                                  - tree_to_uhwi (pow2div)));
5073
5074   return period;
5075 }
5076
5077 /* Returns the comparison operator used when eliminating the iv USE.  */
5078
5079 static enum tree_code
5080 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5081 {
5082   class loop *loop = data->current_loop;
5083   basic_block ex_bb;
5084   edge exit;
5085
5086   ex_bb = gimple_bb (use->stmt);
5087   exit = EDGE_SUCC (ex_bb, 0);
5088   if (flow_bb_inside_loop_p (loop, exit->dest))
5089     exit = EDGE_SUCC (ex_bb, 1);
5090
5091   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5092 }
5093
5094 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5095    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5096    calculation is performed in non-wrapping type.
5097
5098    TODO: More generally, we could test for the situation that
5099          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5100          This would require knowing the sign of OFFSET.  */
5101
5102 static bool
5103 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5104 {
5105   enum tree_code code;
5106   tree e1, e2;
5107   aff_tree aff_e1, aff_e2, aff_offset;
5108
5109   if (!nowrap_type_p (TREE_TYPE (base)))
5110     return false;
5111
5112   base = expand_simple_operations (base);
5113
5114   if (TREE_CODE (base) == SSA_NAME)
5115     {
5116       gimple *stmt = SSA_NAME_DEF_STMT (base);
5117
5118       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5119         return false;
5120
5121       code = gimple_assign_rhs_code (stmt);
5122       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5123         return false;
5124
5125       e1 = gimple_assign_rhs1 (stmt);
5126       e2 = gimple_assign_rhs2 (stmt);
5127     }
5128   else
5129     {
5130       code = TREE_CODE (base);
5131       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5132         return false;
5133       e1 = TREE_OPERAND (base, 0);
5134       e2 = TREE_OPERAND (base, 1);
5135     }
5136
5137   /* Use affine expansion as deeper inspection to prove the equality.  */
5138   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5139                                   &aff_e2, &data->name_expansion_cache);
5140   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5141                                   &aff_offset, &data->name_expansion_cache);
5142   aff_combination_scale (&aff_offset, -1);
5143   switch (code)
5144     {
5145     case PLUS_EXPR:
5146       aff_combination_add (&aff_e2, &aff_offset);
5147       if (aff_combination_zero_p (&aff_e2))
5148         return true;
5149
5150       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5151                                       &aff_e1, &data->name_expansion_cache);
5152       aff_combination_add (&aff_e1, &aff_offset);
5153       return aff_combination_zero_p (&aff_e1);
5154
5155     case POINTER_PLUS_EXPR:
5156       aff_combination_add (&aff_e2, &aff_offset);
5157       return aff_combination_zero_p (&aff_e2);
5158
5159     default:
5160       return false;
5161     }
5162 }
5163
5164 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5165    comparison with CAND.  NITER describes the number of iterations of
5166    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5167
5168    We aim to handle the following situation:
5169
5170    sometype *base, *p;
5171    int a, b, i;
5172
5173    i = a;
5174    p = p_0 = base + a;
5175
5176    do
5177      {
5178        bla (*p);
5179        p++;
5180        i++;
5181      }
5182    while (i < b);
5183
5184    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5185    We aim to optimize this to
5186
5187    p = p_0 = base + a;
5188    do
5189      {
5190        bla (*p);
5191        p++;
5192      }
5193    while (p < p_0 - a + b);
5194
5195    This preserves the correctness, since the pointer arithmetics does not
5196    overflow.  More precisely:
5197
5198    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5199       overflow in computing it or the values of p.
5200    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5201       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5202
5203 static bool
5204 iv_elimination_compare_lt (struct ivopts_data *data,
5205                            struct iv_cand *cand, enum tree_code *comp_p,
5206                            class tree_niter_desc *niter)
5207 {
5208   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5209   class aff_tree nit, tmpa, tmpb;
5210   enum tree_code comp;
5211   HOST_WIDE_INT step;
5212
5213   /* We need to know that the candidate induction variable does not overflow.
5214      While more complex analysis may be used to prove this, for now just
5215      check that the variable appears in the original program and that it
5216      is computed in a type that guarantees no overflows.  */
5217   cand_type = TREE_TYPE (cand->iv->base);
5218   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5219     return false;
5220
5221   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5222      the calculation of the BOUND could overflow, making the comparison
5223      invalid.  */
5224   if (!data->loop_single_exit_p)
5225     return false;
5226
5227   /* We need to be able to decide whether candidate is increasing or decreasing
5228      in order to choose the right comparison operator.  */
5229   if (!cst_and_fits_in_hwi (cand->iv->step))
5230     return false;
5231   step = int_cst_value (cand->iv->step);
5232
5233   /* Check that the number of iterations matches the expected pattern:
5234      a + 1 > b ? 0 : b - a - 1.  */
5235   mbz = niter->may_be_zero;
5236   if (TREE_CODE (mbz) == GT_EXPR)
5237     {
5238       /* Handle a + 1 > b.  */
5239       tree op0 = TREE_OPERAND (mbz, 0);
5240       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5241         {
5242           a = TREE_OPERAND (op0, 0);
5243           b = TREE_OPERAND (mbz, 1);
5244         }
5245       else
5246         return false;
5247     }
5248   else if (TREE_CODE (mbz) == LT_EXPR)
5249     {
5250       tree op1 = TREE_OPERAND (mbz, 1);
5251
5252       /* Handle b < a + 1.  */
5253       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5254         {
5255           a = TREE_OPERAND (op1, 0);
5256           b = TREE_OPERAND (mbz, 0);
5257         }
5258       else
5259         return false;
5260     }
5261   else
5262     return false;
5263
5264   /* Expected number of iterations is B - A - 1.  Check that it matches
5265      the actual number, i.e., that B - A - NITER = 1.  */
5266   tree_to_aff_combination (niter->niter, nit_type, &nit);
5267   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5268   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5269   aff_combination_scale (&nit, -1);
5270   aff_combination_scale (&tmpa, -1);
5271   aff_combination_add (&tmpb, &tmpa);
5272   aff_combination_add (&tmpb, &nit);
5273   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5274     return false;
5275
5276   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5277      overflow.  */
5278   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5279                         cand->iv->step,
5280                         fold_convert (TREE_TYPE (cand->iv->step), a));
5281   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5282     return false;
5283
5284   /* Determine the new comparison operator.  */
5285   comp = step < 0 ? GT_EXPR : LT_EXPR;
5286   if (*comp_p == NE_EXPR)
5287     *comp_p = comp;
5288   else if (*comp_p == EQ_EXPR)
5289     *comp_p = invert_tree_comparison (comp, false);
5290   else
5291     gcc_unreachable ();
5292
5293   return true;
5294 }
5295
5296 /* Check whether it is possible to express the condition in USE by comparison
5297    of candidate CAND.  If so, store the value compared with to BOUND, and the
5298    comparison operator to COMP.  */
5299
5300 static bool
5301 may_eliminate_iv (struct ivopts_data *data,
5302                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5303                   enum tree_code *comp)
5304 {
5305   basic_block ex_bb;
5306   edge exit;
5307   tree period;
5308   class loop *loop = data->current_loop;
5309   aff_tree bnd;
5310   class tree_niter_desc *desc = NULL;
5311
5312   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5313     return false;
5314
5315   /* For now works only for exits that dominate the loop latch.
5316      TODO: extend to other conditions inside loop body.  */
5317   ex_bb = gimple_bb (use->stmt);
5318   if (use->stmt != last_stmt (ex_bb)
5319       || gimple_code (use->stmt) != GIMPLE_COND
5320       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5321     return false;
5322
5323   exit = EDGE_SUCC (ex_bb, 0);
5324   if (flow_bb_inside_loop_p (loop, exit->dest))
5325     exit = EDGE_SUCC (ex_bb, 1);
5326   if (flow_bb_inside_loop_p (loop, exit->dest))
5327     return false;
5328
5329   desc = niter_for_exit (data, exit);
5330   if (!desc)
5331     return false;
5332
5333   /* Determine whether we can use the variable to test the exit condition.
5334      This is the case iff the period of the induction variable is greater
5335      than the number of iterations for which the exit condition is true.  */
5336   period = iv_period (cand->iv);
5337
5338   /* If the number of iterations is constant, compare against it directly.  */
5339   if (TREE_CODE (desc->niter) == INTEGER_CST)
5340     {
5341       /* See cand_value_at.  */
5342       if (stmt_after_increment (loop, cand, use->stmt))
5343         {
5344           if (!tree_int_cst_lt (desc->niter, period))
5345             return false;
5346         }
5347       else
5348         {
5349           if (tree_int_cst_lt (period, desc->niter))
5350             return false;
5351         }
5352     }
5353
5354   /* If not, and if this is the only possible exit of the loop, see whether
5355      we can get a conservative estimate on the number of iterations of the
5356      entire loop and compare against that instead.  */
5357   else
5358     {
5359       widest_int period_value, max_niter;
5360
5361       max_niter = desc->max;
5362       if (stmt_after_increment (loop, cand, use->stmt))
5363         max_niter += 1;
5364       period_value = wi::to_widest (period);
5365       if (wi::gtu_p (max_niter, period_value))
5366         {
5367           /* See if we can take advantage of inferred loop bound
5368              information.  */
5369           if (data->loop_single_exit_p)
5370             {
5371               if (!max_loop_iterations (loop, &max_niter))
5372                 return false;
5373               /* The loop bound is already adjusted by adding 1.  */
5374               if (wi::gtu_p (max_niter, period_value))
5375                 return false;
5376             }
5377           else
5378             return false;
5379         }
5380     }
5381
5382   /* For doloop IV cand, the bound would be zero.  It's safe whether
5383      may_be_zero set or not.  */
5384   if (cand->doloop_p)
5385     {
5386       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5387       *comp = iv_elimination_compare (data, use);
5388       return true;
5389     }
5390
5391   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5392
5393   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5394                          aff_combination_to_tree (&bnd));
5395   *comp = iv_elimination_compare (data, use);
5396
5397   /* It is unlikely that computing the number of iterations using division
5398      would be more profitable than keeping the original induction variable.  */
5399   if (expression_expensive_p (*bound))
5400     return false;
5401
5402   /* Sometimes, it is possible to handle the situation that the number of
5403      iterations may be zero unless additional assumptions by using <
5404      instead of != in the exit condition.
5405
5406      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5407            base the exit condition on it.  However, that is often too
5408            expensive.  */
5409   if (!integer_zerop (desc->may_be_zero))
5410     return iv_elimination_compare_lt (data, cand, comp, desc);
5411
5412   return true;
5413 }
5414
5415  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5416     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5417
5418 static int
5419 parm_decl_cost (struct ivopts_data *data, tree bound)
5420 {
5421   tree sbound = bound;
5422   STRIP_NOPS (sbound);
5423
5424   if (TREE_CODE (sbound) == SSA_NAME
5425       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5426       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5427       && data->body_includes_call)
5428     return COSTS_N_INSNS (1);
5429
5430   return 0;
5431 }
5432
5433 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5434
5435 static bool
5436 determine_group_iv_cost_cond (struct ivopts_data *data,
5437                               struct iv_group *group, struct iv_cand *cand)
5438 {
5439   tree bound = NULL_TREE;
5440   struct iv *cmp_iv;
5441   bitmap inv_exprs = NULL;
5442   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5443   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5444   enum comp_iv_rewrite rewrite_type;
5445   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5446   tree *control_var, *bound_cst;
5447   enum tree_code comp = ERROR_MARK;
5448   struct iv_use *use = group->vuses[0];
5449
5450   /* Extract condition operands.  */
5451   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5452                                         &bound_cst, NULL, &cmp_iv);
5453   gcc_assert (rewrite_type != COMP_IV_NA);
5454
5455   /* Try iv elimination.  */
5456   if (rewrite_type == COMP_IV_ELIM
5457       && may_eliminate_iv (data, use, cand, &bound, &comp))
5458     {
5459       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5460       if (elim_cost.cost == 0)
5461         elim_cost.cost = parm_decl_cost (data, bound);
5462       else if (TREE_CODE (bound) == INTEGER_CST)
5463         elim_cost.cost = 0;
5464       /* If we replace a loop condition 'i < n' with 'p < base + n',
5465          inv_vars_elim will have 'base' and 'n' set, which implies that both
5466          'base' and 'n' will be live during the loop.    More likely,
5467          'base + n' will be loop invariant, resulting in only one live value
5468          during the loop.  So in that case we clear inv_vars_elim and set
5469          inv_expr_elim instead.  */
5470       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5471         {
5472           inv_expr_elim = get_loop_invariant_expr (data, bound);
5473           bitmap_clear (inv_vars_elim);
5474         }
5475       /* The bound is a loop invariant, so it will be only computed
5476          once.  */
5477       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5478     }
5479
5480   /* When the condition is a comparison of the candidate IV against
5481      zero, prefer this IV.
5482
5483      TODO: The constant that we're subtracting from the cost should
5484      be target-dependent.  This information should be added to the
5485      target costs for each backend.  */
5486   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5487       && integer_zerop (*bound_cst)
5488       && (operand_equal_p (*control_var, cand->var_after, 0)
5489           || operand_equal_p (*control_var, cand->var_before, 0)))
5490     elim_cost -= 1;
5491
5492   express_cost = get_computation_cost (data, use, cand, false,
5493                                        &inv_vars_express, NULL,
5494                                        &inv_expr_express);
5495   if (cmp_iv != NULL)
5496     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5497
5498   /* Count the cost of the original bound as well.  */
5499   bound_cost = force_var_cost (data, *bound_cst, NULL);
5500   if (bound_cost.cost == 0)
5501     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5502   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5503     bound_cost.cost = 0;
5504   express_cost += bound_cost;
5505
5506   /* Choose the better approach, preferring the eliminated IV. */
5507   if (elim_cost <= express_cost)
5508     {
5509       cost = elim_cost;
5510       inv_vars = inv_vars_elim;
5511       inv_vars_elim = NULL;
5512       inv_expr = inv_expr_elim;
5513       /* For doloop candidate/use pair, adjust to zero cost.  */
5514       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5515         cost = no_cost;
5516     }
5517   else
5518     {
5519       cost = express_cost;
5520       inv_vars = inv_vars_express;
5521       inv_vars_express = NULL;
5522       bound = NULL_TREE;
5523       comp = ERROR_MARK;
5524       inv_expr = inv_expr_express;
5525     }
5526
5527   if (inv_expr)
5528     {
5529       inv_exprs = BITMAP_ALLOC (NULL);
5530       bitmap_set_bit (inv_exprs, inv_expr->id);
5531     }
5532   set_group_iv_cost (data, group, cand, cost,
5533                      inv_vars, bound, comp, inv_exprs);
5534
5535   if (inv_vars_elim)
5536     BITMAP_FREE (inv_vars_elim);
5537   if (inv_vars_express)
5538     BITMAP_FREE (inv_vars_express);
5539
5540   return !cost.infinite_cost_p ();
5541 }
5542
5543 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5544    if USE cannot be represented with CAND.  */
5545
5546 static bool
5547 determine_group_iv_cost (struct ivopts_data *data,
5548                          struct iv_group *group, struct iv_cand *cand)
5549 {
5550   switch (group->type)
5551     {
5552     case USE_NONLINEAR_EXPR:
5553       return determine_group_iv_cost_generic (data, group, cand);
5554
5555     case USE_REF_ADDRESS:
5556     case USE_PTR_ADDRESS:
5557       return determine_group_iv_cost_address (data, group, cand);
5558
5559     case USE_COMPARE:
5560       return determine_group_iv_cost_cond (data, group, cand);
5561
5562     default:
5563       gcc_unreachable ();
5564     }
5565 }
5566
5567 /* Return true if get_computation_cost indicates that autoincrement is
5568    a possibility for the pair of USE and CAND, false otherwise.  */
5569
5570 static bool
5571 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5572                            struct iv_cand *cand)
5573 {
5574   if (!address_p (use->type))
5575     return false;
5576
5577   bool can_autoinc = false;
5578   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5579   return can_autoinc;
5580 }
5581
5582 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5583    use that allows autoincrement, and set their AINC_USE if possible.  */
5584
5585 static void
5586 set_autoinc_for_original_candidates (struct ivopts_data *data)
5587 {
5588   unsigned i, j;
5589
5590   for (i = 0; i < data->vcands.length (); i++)
5591     {
5592       struct iv_cand *cand = data->vcands[i];
5593       struct iv_use *closest_before = NULL;
5594       struct iv_use *closest_after = NULL;
5595       if (cand->pos != IP_ORIGINAL)
5596         continue;
5597
5598       for (j = 0; j < data->vgroups.length (); j++)
5599         {
5600           struct iv_group *group = data->vgroups[j];
5601           struct iv_use *use = group->vuses[0];
5602           unsigned uid = gimple_uid (use->stmt);
5603
5604           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5605             continue;
5606
5607           if (uid < gimple_uid (cand->incremented_at)
5608               && (closest_before == NULL
5609                   || uid > gimple_uid (closest_before->stmt)))
5610             closest_before = use;
5611
5612           if (uid > gimple_uid (cand->incremented_at)
5613               && (closest_after == NULL
5614                   || uid < gimple_uid (closest_after->stmt)))
5615             closest_after = use;
5616         }
5617
5618       if (closest_before != NULL
5619           && autoinc_possible_for_pair (data, closest_before, cand))
5620         cand->ainc_use = closest_before;
5621       else if (closest_after != NULL
5622                && autoinc_possible_for_pair (data, closest_after, cand))
5623         cand->ainc_use = closest_after;
5624     }
5625 }
5626
5627 /* Relate compare use with all candidates.  */
5628
5629 static void
5630 relate_compare_use_with_all_cands (struct ivopts_data *data)
5631 {
5632   unsigned i, count = data->vcands.length ();
5633   for (i = 0; i < data->vgroups.length (); i++)
5634     {
5635       struct iv_group *group = data->vgroups[i];
5636
5637       if (group->type == USE_COMPARE)
5638         bitmap_set_range (group->related_cands, 0, count);
5639     }
5640 }
5641
5642 /* Add one doloop dedicated IV candidate:
5643      - Base is (may_be_zero ? 1 : (niter + 1)).
5644      - Step is -1.  */
5645
5646 static void
5647 add_iv_candidate_for_doloop (struct ivopts_data *data)
5648 {
5649   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5650   gcc_assert (niter_desc && niter_desc->assumptions);
5651
5652   tree niter = niter_desc->niter;
5653   tree ntype = TREE_TYPE (niter);
5654   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5655
5656   tree may_be_zero = niter_desc->may_be_zero;
5657   if (may_be_zero && integer_zerop (may_be_zero))
5658     may_be_zero = NULL_TREE;
5659   if (may_be_zero)
5660     {
5661       if (COMPARISON_CLASS_P (may_be_zero))
5662         {
5663           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5664                                build_int_cst (ntype, 0),
5665                                rewrite_to_non_trapping_overflow (niter));
5666         }
5667       /* Don't try to obtain the iteration count expression when may_be_zero is
5668          integer_nonzerop (actually iteration count is one) or else.  */
5669       else
5670         return;
5671     }
5672
5673   tree base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5674                            build_int_cst (ntype, 1));
5675   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5676 }
5677
5678 /* Finds the candidates for the induction variables.  */
5679
5680 static void
5681 find_iv_candidates (struct ivopts_data *data)
5682 {
5683   /* Add commonly used ivs.  */
5684   add_standard_iv_candidates (data);
5685
5686   /* Add doloop dedicated ivs.  */
5687   if (data->doloop_use_p)
5688     add_iv_candidate_for_doloop (data);
5689
5690   /* Add old induction variables.  */
5691   add_iv_candidate_for_bivs (data);
5692
5693   /* Add induction variables derived from uses.  */
5694   add_iv_candidate_for_groups (data);
5695
5696   set_autoinc_for_original_candidates (data);
5697
5698   /* Record the important candidates.  */
5699   record_important_candidates (data);
5700
5701   /* Relate compare iv_use with all candidates.  */
5702   if (!data->consider_all_candidates)
5703     relate_compare_use_with_all_cands (data);
5704
5705   if (dump_file && (dump_flags & TDF_DETAILS))
5706     {
5707       unsigned i;
5708
5709       fprintf (dump_file, "\n<Important Candidates>:\t");
5710       for (i = 0; i < data->vcands.length (); i++)
5711         if (data->vcands[i]->important)
5712           fprintf (dump_file, " %d,", data->vcands[i]->id);
5713       fprintf (dump_file, "\n");
5714
5715       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5716       for (i = 0; i < data->vgroups.length (); i++)
5717         {
5718           struct iv_group *group = data->vgroups[i];
5719
5720           if (group->related_cands)
5721             {
5722               fprintf (dump_file, "  Group %d:\t", group->id);
5723               dump_bitmap (dump_file, group->related_cands);
5724             }
5725         }
5726       fprintf (dump_file, "\n");
5727     }
5728 }
5729
5730 /* Determines costs of computing use of iv with an iv candidate.  */
5731
5732 static void
5733 determine_group_iv_costs (struct ivopts_data *data)
5734 {
5735   unsigned i, j;
5736   struct iv_cand *cand;
5737   struct iv_group *group;
5738   bitmap to_clear = BITMAP_ALLOC (NULL);
5739
5740   alloc_use_cost_map (data);
5741
5742   for (i = 0; i < data->vgroups.length (); i++)
5743     {
5744       group = data->vgroups[i];
5745
5746       if (data->consider_all_candidates)
5747         {
5748           for (j = 0; j < data->vcands.length (); j++)
5749             {
5750               cand = data->vcands[j];
5751               determine_group_iv_cost (data, group, cand);
5752             }
5753         }
5754       else
5755         {
5756           bitmap_iterator bi;
5757
5758           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5759             {
5760               cand = data->vcands[j];
5761               if (!determine_group_iv_cost (data, group, cand))
5762                 bitmap_set_bit (to_clear, j);
5763             }
5764
5765           /* Remove the candidates for that the cost is infinite from
5766              the list of related candidates.  */
5767           bitmap_and_compl_into (group->related_cands, to_clear);
5768           bitmap_clear (to_clear);
5769         }
5770     }
5771
5772   BITMAP_FREE (to_clear);
5773
5774   if (dump_file && (dump_flags & TDF_DETAILS))
5775     {
5776       bitmap_iterator bi;
5777
5778       /* Dump invariant variables.  */
5779       fprintf (dump_file, "\n<Invariant Vars>:\n");
5780       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5781         {
5782           struct version_info *info = ver_info (data, i);
5783           if (info->inv_id)
5784             {
5785               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5786               print_generic_expr (dump_file, info->name, TDF_SLIM);
5787               fprintf (dump_file, "%s\n",
5788                        info->has_nonlin_use ? "" : "\t(eliminable)");
5789             }
5790         }
5791
5792       /* Dump invariant expressions.  */
5793       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5794       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5795
5796       for (hash_table<iv_inv_expr_hasher>::iterator it
5797            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5798            ++it)
5799         list.safe_push (*it);
5800
5801       list.qsort (sort_iv_inv_expr_ent);
5802
5803       for (i = 0; i < list.length (); ++i)
5804         {
5805           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5806           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5807           fprintf (dump_file, "\n");
5808         }
5809
5810       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5811
5812       for (i = 0; i < data->vgroups.length (); i++)
5813         {
5814           group = data->vgroups[i];
5815
5816           fprintf (dump_file, "Group %d:\n", i);
5817           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5818           for (j = 0; j < group->n_map_members; j++)
5819             {
5820               if (!group->cost_map[j].cand
5821                   || group->cost_map[j].cost.infinite_cost_p ())
5822                 continue;
5823
5824               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5825                        group->cost_map[j].cand->id,
5826                        group->cost_map[j].cost.cost,
5827                        group->cost_map[j].cost.complexity);
5828               if (!group->cost_map[j].inv_exprs
5829                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5830                 fprintf (dump_file, "NIL;\t");
5831               else
5832                 bitmap_print (dump_file,
5833                               group->cost_map[j].inv_exprs, "", ";\t");
5834               if (!group->cost_map[j].inv_vars
5835                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5836                 fprintf (dump_file, "NIL;\n");
5837               else
5838                 bitmap_print (dump_file,
5839                               group->cost_map[j].inv_vars, "", "\n");
5840             }
5841
5842           fprintf (dump_file, "\n");
5843         }
5844       fprintf (dump_file, "\n");
5845     }
5846 }
5847
5848 /* Determines cost of the candidate CAND.  */
5849
5850 static void
5851 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5852 {
5853   comp_cost cost_base;
5854   int64_t cost, cost_step;
5855   tree base;
5856
5857   gcc_assert (cand->iv != NULL);
5858
5859   /* There are two costs associated with the candidate -- its increment
5860      and its initialization.  The second is almost negligible for any loop
5861      that rolls enough, so we take it just very little into account.  */
5862
5863   base = cand->iv->base;
5864   cost_base = force_var_cost (data, base, NULL);
5865   /* It will be exceptional that the iv register happens to be initialized with
5866      the proper value at no cost.  In general, there will at least be a regcopy
5867      or a const set.  */
5868   if (cost_base.cost == 0)
5869     cost_base.cost = COSTS_N_INSNS (1);
5870   /* Doloop decrement should be considered as zero cost.  */
5871   if (cand->doloop_p)
5872     cost_step = 0;
5873   else
5874     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5875   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5876
5877   /* Prefer the original ivs unless we may gain something by replacing it.
5878      The reason is to make debugging simpler; so this is not relevant for
5879      artificial ivs created by other optimization passes.  */
5880   if ((cand->pos != IP_ORIGINAL
5881        || !SSA_NAME_VAR (cand->var_before)
5882        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5883       /* Prefer doloop as well.  */
5884       && !cand->doloop_p)
5885     cost++;
5886
5887   /* Prefer not to insert statements into latch unless there are some
5888      already (so that we do not create unnecessary jumps).  */
5889   if (cand->pos == IP_END
5890       && empty_block_p (ip_end_pos (data->current_loop)))
5891     cost++;
5892
5893   cand->cost = cost;
5894   cand->cost_step = cost_step;
5895 }
5896
5897 /* Determines costs of computation of the candidates.  */
5898
5899 static void
5900 determine_iv_costs (struct ivopts_data *data)
5901 {
5902   unsigned i;
5903
5904   if (dump_file && (dump_flags & TDF_DETAILS))
5905     {
5906       fprintf (dump_file, "<Candidate Costs>:\n");
5907       fprintf (dump_file, "  cand\tcost\n");
5908     }
5909
5910   for (i = 0; i < data->vcands.length (); i++)
5911     {
5912       struct iv_cand *cand = data->vcands[i];
5913
5914       determine_iv_cost (data, cand);
5915
5916       if (dump_file && (dump_flags & TDF_DETAILS))
5917         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5918     }
5919
5920   if (dump_file && (dump_flags & TDF_DETAILS))
5921     fprintf (dump_file, "\n");
5922 }
5923
5924 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5925    induction variables.  Note N_INVS includes both invariant variables and
5926    invariant expressions.  */
5927
5928 static unsigned
5929 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5930                               unsigned n_cands)
5931 {
5932   unsigned cost;
5933   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5934   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5935   bool speed = data->speed;
5936
5937   /* If there is a call in the loop body, the call-clobbered registers
5938      are not available for loop invariants.  */
5939   if (data->body_includes_call)
5940     available_regs = available_regs - target_clobbered_regs;
5941
5942   /* If we have enough registers.  */
5943   if (regs_needed + target_res_regs < available_regs)
5944     cost = n_new;
5945   /* If close to running out of registers, try to preserve them.  */
5946   else if (regs_needed <= available_regs)
5947     cost = target_reg_cost [speed] * regs_needed;
5948   /* If we run out of available registers but the number of candidates
5949      does not, we penalize extra registers using target_spill_cost.  */
5950   else if (n_cands <= available_regs)
5951     cost = target_reg_cost [speed] * available_regs
5952            + target_spill_cost [speed] * (regs_needed - available_regs);
5953   /* If the number of candidates runs out available registers, we penalize
5954      extra candidate registers using target_spill_cost * 2.  Because it is
5955      more expensive to spill induction variable than invariant.  */
5956   else
5957     cost = target_reg_cost [speed] * available_regs
5958            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5959            + target_spill_cost [speed] * (regs_needed - n_cands);
5960
5961   /* Finally, add the number of candidates, so that we prefer eliminating
5962      induction variables if possible.  */
5963   return cost + n_cands;
5964 }
5965
5966 /* For each size of the induction variable set determine the penalty.  */
5967
5968 static void
5969 determine_set_costs (struct ivopts_data *data)
5970 {
5971   unsigned j, n;
5972   gphi *phi;
5973   gphi_iterator psi;
5974   tree op;
5975   class loop *loop = data->current_loop;
5976   bitmap_iterator bi;
5977
5978   if (dump_file && (dump_flags & TDF_DETAILS))
5979     {
5980       fprintf (dump_file, "<Global Costs>:\n");
5981       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5982       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5983       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5984       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5985     }
5986
5987   n = 0;
5988   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5989     {
5990       phi = psi.phi ();
5991       op = PHI_RESULT (phi);
5992
5993       if (virtual_operand_p (op))
5994         continue;
5995
5996       if (get_iv (data, op))
5997         continue;
5998
5999       if (!POINTER_TYPE_P (TREE_TYPE (op))
6000           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6001         continue;
6002
6003       n++;
6004     }
6005
6006   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6007     {
6008       struct version_info *info = ver_info (data, j);
6009
6010       if (info->inv_id && info->has_nonlin_use)
6011         n++;
6012     }
6013
6014   data->regs_used = n;
6015   if (dump_file && (dump_flags & TDF_DETAILS))
6016     fprintf (dump_file, "  regs_used %d\n", n);
6017
6018   if (dump_file && (dump_flags & TDF_DETAILS))
6019     {
6020       fprintf (dump_file, "  cost for size:\n");
6021       fprintf (dump_file, "  ivs\tcost\n");
6022       for (j = 0; j <= 2 * target_avail_regs; j++)
6023         fprintf (dump_file, "  %d\t%d\n", j,
6024                  ivopts_estimate_reg_pressure (data, 0, j));
6025       fprintf (dump_file, "\n");
6026     }
6027 }
6028
6029 /* Returns true if A is a cheaper cost pair than B.  */
6030
6031 static bool
6032 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6033 {
6034   if (!a)
6035     return false;
6036
6037   if (!b)
6038     return true;
6039
6040   if (a->cost < b->cost)
6041     return true;
6042
6043   if (b->cost < a->cost)
6044     return false;
6045
6046   /* In case the costs are the same, prefer the cheaper candidate.  */
6047   if (a->cand->cost < b->cand->cost)
6048     return true;
6049
6050   return false;
6051 }
6052
6053 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6054    for more expensive, equal and cheaper respectively.  */
6055
6056 static int
6057 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6058 {
6059   if (cheaper_cost_pair (a, b))
6060     return -1;
6061   if (cheaper_cost_pair (b, a))
6062     return 1;
6063
6064   return 0;
6065 }
6066
6067 /* Returns candidate by that USE is expressed in IVS.  */
6068
6069 static class cost_pair *
6070 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6071 {
6072   return ivs->cand_for_group[group->id];
6073 }
6074
6075 /* Computes the cost field of IVS structure.  */
6076
6077 static void
6078 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6079 {
6080   comp_cost cost = ivs->cand_use_cost;
6081
6082   cost += ivs->cand_cost;
6083   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6084   ivs->cost = cost;
6085 }
6086
6087 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6088    and IVS.  */
6089
6090 static void
6091 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6092 {
6093   bitmap_iterator bi;
6094   unsigned iid;
6095
6096   if (!invs)
6097     return;
6098
6099   gcc_assert (n_inv_uses != NULL);
6100   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6101     {
6102       n_inv_uses[iid]--;
6103       if (n_inv_uses[iid] == 0)
6104         ivs->n_invs--;
6105     }
6106 }
6107
6108 /* Set USE not to be expressed by any candidate in IVS.  */
6109
6110 static void
6111 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6112                  struct iv_group *group)
6113 {
6114   unsigned gid = group->id, cid;
6115   class cost_pair *cp;
6116
6117   cp = ivs->cand_for_group[gid];
6118   if (!cp)
6119     return;
6120   cid = cp->cand->id;
6121
6122   ivs->bad_groups++;
6123   ivs->cand_for_group[gid] = NULL;
6124   ivs->n_cand_uses[cid]--;
6125
6126   if (ivs->n_cand_uses[cid] == 0)
6127     {
6128       bitmap_clear_bit (ivs->cands, cid);
6129       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6130         ivs->n_cands--;
6131       ivs->cand_cost -= cp->cand->cost;
6132       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6133       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6134     }
6135
6136   ivs->cand_use_cost -= cp->cost;
6137   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6138   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6139   iv_ca_recount_cost (data, ivs);
6140 }
6141
6142 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6143    IVS.  */
6144
6145 static void
6146 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6147 {
6148   bitmap_iterator bi;
6149   unsigned iid;
6150
6151   if (!invs)
6152     return;
6153
6154   gcc_assert (n_inv_uses != NULL);
6155   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6156     {
6157       n_inv_uses[iid]++;
6158       if (n_inv_uses[iid] == 1)
6159         ivs->n_invs++;
6160     }
6161 }
6162
6163 /* Set cost pair for GROUP in set IVS to CP.  */
6164
6165 static void
6166 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6167               struct iv_group *group, class cost_pair *cp)
6168 {
6169   unsigned gid = group->id, cid;
6170
6171   if (ivs->cand_for_group[gid] == cp)
6172     return;
6173
6174   if (ivs->cand_for_group[gid])
6175     iv_ca_set_no_cp (data, ivs, group);
6176
6177   if (cp)
6178     {
6179       cid = cp->cand->id;
6180
6181       ivs->bad_groups--;
6182       ivs->cand_for_group[gid] = cp;
6183       ivs->n_cand_uses[cid]++;
6184       if (ivs->n_cand_uses[cid] == 1)
6185         {
6186           bitmap_set_bit (ivs->cands, cid);
6187           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6188             ivs->n_cands++;
6189           ivs->cand_cost += cp->cand->cost;
6190           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6191           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6192         }
6193
6194       ivs->cand_use_cost += cp->cost;
6195       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6196       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6197       iv_ca_recount_cost (data, ivs);
6198     }
6199 }
6200
6201 /* Extend set IVS by expressing USE by some of the candidates in it
6202    if possible.  Consider all important candidates if candidates in
6203    set IVS don't give any result.  */
6204
6205 static void
6206 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6207                struct iv_group *group)
6208 {
6209   class cost_pair *best_cp = NULL, *cp;
6210   bitmap_iterator bi;
6211   unsigned i;
6212   struct iv_cand *cand;
6213
6214   gcc_assert (ivs->upto >= group->id);
6215   ivs->upto++;
6216   ivs->bad_groups++;
6217
6218   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6219     {
6220       cand = data->vcands[i];
6221       cp = get_group_iv_cost (data, group, cand);
6222       if (cheaper_cost_pair (cp, best_cp))
6223         best_cp = cp;
6224     }
6225
6226   if (best_cp == NULL)
6227     {
6228       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6229         {
6230           cand = data->vcands[i];
6231           cp = get_group_iv_cost (data, group, cand);
6232           if (cheaper_cost_pair (cp, best_cp))
6233             best_cp = cp;
6234         }
6235     }
6236
6237   iv_ca_set_cp (data, ivs, group, best_cp);
6238 }
6239
6240 /* Get cost for assignment IVS.  */
6241
6242 static comp_cost
6243 iv_ca_cost (class iv_ca *ivs)
6244 {
6245   /* This was a conditional expression but it triggered a bug in
6246      Sun C 5.5.  */
6247   if (ivs->bad_groups)
6248     return infinite_cost;
6249   else
6250     return ivs->cost;
6251 }
6252
6253 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6254    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6255    respectively.  */
6256
6257 static int
6258 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6259                     struct iv_group *group, class cost_pair *old_cp,
6260                     class cost_pair *new_cp)
6261 {
6262   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6263   unsigned old_n_invs = ivs->n_invs;
6264   iv_ca_set_cp (data, ivs, group, new_cp);
6265   unsigned new_n_invs = ivs->n_invs;
6266   iv_ca_set_cp (data, ivs, group, old_cp);
6267
6268   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6269 }
6270
6271 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6272    it before NEXT.  */
6273
6274 static struct iv_ca_delta *
6275 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6276                  class cost_pair *new_cp, struct iv_ca_delta *next)
6277 {
6278   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6279
6280   change->group = group;
6281   change->old_cp = old_cp;
6282   change->new_cp = new_cp;
6283   change->next = next;
6284
6285   return change;
6286 }
6287
6288 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6289    are rewritten.  */
6290
6291 static struct iv_ca_delta *
6292 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6293 {
6294   struct iv_ca_delta *last;
6295
6296   if (!l2)
6297     return l1;
6298
6299   if (!l1)
6300     return l2;
6301
6302   for (last = l1; last->next; last = last->next)
6303     continue;
6304   last->next = l2;
6305
6306   return l1;
6307 }
6308
6309 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6310
6311 static struct iv_ca_delta *
6312 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6313 {
6314   struct iv_ca_delta *act, *next, *prev = NULL;
6315
6316   for (act = delta; act; act = next)
6317     {
6318       next = act->next;
6319       act->next = prev;
6320       prev = act;
6321
6322       std::swap (act->old_cp, act->new_cp);
6323     }
6324
6325   return prev;
6326 }
6327
6328 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6329    reverted instead.  */
6330
6331 static void
6332 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6333                     struct iv_ca_delta *delta, bool forward)
6334 {
6335   class cost_pair *from, *to;
6336   struct iv_ca_delta *act;
6337
6338   if (!forward)
6339     delta = iv_ca_delta_reverse (delta);
6340
6341   for (act = delta; act; act = act->next)
6342     {
6343       from = act->old_cp;
6344       to = act->new_cp;
6345       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6346       iv_ca_set_cp (data, ivs, act->group, to);
6347     }
6348
6349   if (!forward)
6350     iv_ca_delta_reverse (delta);
6351 }
6352
6353 /* Returns true if CAND is used in IVS.  */
6354
6355 static bool
6356 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6357 {
6358   return ivs->n_cand_uses[cand->id] > 0;
6359 }
6360
6361 /* Returns number of induction variable candidates in the set IVS.  */
6362
6363 static unsigned
6364 iv_ca_n_cands (class iv_ca *ivs)
6365 {
6366   return ivs->n_cands;
6367 }
6368
6369 /* Free the list of changes DELTA.  */
6370
6371 static void
6372 iv_ca_delta_free (struct iv_ca_delta **delta)
6373 {
6374   struct iv_ca_delta *act, *next;
6375
6376   for (act = *delta; act; act = next)
6377     {
6378       next = act->next;
6379       free (act);
6380     }
6381
6382   *delta = NULL;
6383 }
6384
6385 /* Allocates new iv candidates assignment.  */
6386
6387 static class iv_ca *
6388 iv_ca_new (struct ivopts_data *data)
6389 {
6390   class iv_ca *nw = XNEW (class iv_ca);
6391
6392   nw->upto = 0;
6393   nw->bad_groups = 0;
6394   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6395                                  data->vgroups.length ());
6396   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6397   nw->cands = BITMAP_ALLOC (NULL);
6398   nw->n_cands = 0;
6399   nw->n_invs = 0;
6400   nw->cand_use_cost = no_cost;
6401   nw->cand_cost = 0;
6402   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6403   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6404   nw->cost = no_cost;
6405
6406   return nw;
6407 }
6408
6409 /* Free memory occupied by the set IVS.  */
6410
6411 static void
6412 iv_ca_free (class iv_ca **ivs)
6413 {
6414   free ((*ivs)->cand_for_group);
6415   free ((*ivs)->n_cand_uses);
6416   BITMAP_FREE ((*ivs)->cands);
6417   free ((*ivs)->n_inv_var_uses);
6418   free ((*ivs)->n_inv_expr_uses);
6419   free (*ivs);
6420   *ivs = NULL;
6421 }
6422
6423 /* Dumps IVS to FILE.  */
6424
6425 static void
6426 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6427 {
6428   unsigned i;
6429   comp_cost cost = iv_ca_cost (ivs);
6430
6431   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6432            cost.complexity);
6433   fprintf (file, "  reg_cost: %d\n",
6434            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6435   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6436            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6437            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6438   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6439
6440   for (i = 0; i < ivs->upto; i++)
6441     {
6442       struct iv_group *group = data->vgroups[i];
6443       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6444       if (cp)
6445         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6446                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6447                  cp->cost.cost, cp->cost.complexity);
6448       else
6449         fprintf (file, "   group:%d --> ??\n", group->id);
6450     }
6451
6452   const char *pref = "";
6453   fprintf (file, "  invariant variables: ");
6454   for (i = 1; i <= data->max_inv_var_id; i++)
6455     if (ivs->n_inv_var_uses[i])
6456       {
6457         fprintf (file, "%s%d", pref, i);
6458         pref = ", ";
6459       }
6460
6461   pref = "";
6462   fprintf (file, "\n  invariant expressions: ");
6463   for (i = 1; i <= data->max_inv_expr_id; i++)
6464     if (ivs->n_inv_expr_uses[i])
6465       {
6466         fprintf (file, "%s%d", pref, i);
6467         pref = ", ";
6468       }
6469
6470   fprintf (file, "\n\n");
6471 }
6472
6473 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6474    new set, and store differences in DELTA.  Number of induction variables
6475    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6476    the function will try to find a solution with mimimal iv candidates.  */
6477
6478 static comp_cost
6479 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6480               struct iv_cand *cand, struct iv_ca_delta **delta,
6481               unsigned *n_ivs, bool min_ncand)
6482 {
6483   unsigned i;
6484   comp_cost cost;
6485   struct iv_group *group;
6486   class cost_pair *old_cp, *new_cp;
6487
6488   *delta = NULL;
6489   for (i = 0; i < ivs->upto; i++)
6490     {
6491       group = data->vgroups[i];
6492       old_cp = iv_ca_cand_for_group (ivs, group);
6493
6494       if (old_cp
6495           && old_cp->cand == cand)
6496         continue;
6497
6498       new_cp = get_group_iv_cost (data, group, cand);
6499       if (!new_cp)
6500         continue;
6501
6502       if (!min_ncand)
6503         {
6504           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6505           /* Skip if new_cp depends on more invariants.  */
6506           if (cmp_invs > 0)
6507             continue;
6508
6509           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6510           /* Skip if new_cp is not cheaper.  */
6511           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6512             continue;
6513         }
6514
6515       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6516     }
6517
6518   iv_ca_delta_commit (data, ivs, *delta, true);
6519   cost = iv_ca_cost (ivs);
6520   if (n_ivs)
6521     *n_ivs = iv_ca_n_cands (ivs);
6522   iv_ca_delta_commit (data, ivs, *delta, false);
6523
6524   return cost;
6525 }
6526
6527 /* Try narrowing set IVS by removing CAND.  Return the cost of
6528    the new set and store the differences in DELTA.  START is
6529    the candidate with which we start narrowing.  */
6530
6531 static comp_cost
6532 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6533               struct iv_cand *cand, struct iv_cand *start,
6534               struct iv_ca_delta **delta)
6535 {
6536   unsigned i, ci;
6537   struct iv_group *group;
6538   class cost_pair *old_cp, *new_cp, *cp;
6539   bitmap_iterator bi;
6540   struct iv_cand *cnd;
6541   comp_cost cost, best_cost, acost;
6542
6543   *delta = NULL;
6544   for (i = 0; i < data->vgroups.length (); i++)
6545     {
6546       group = data->vgroups[i];
6547
6548       old_cp = iv_ca_cand_for_group (ivs, group);
6549       if (old_cp->cand != cand)
6550         continue;
6551
6552       best_cost = iv_ca_cost (ivs);
6553       /* Start narrowing with START.  */
6554       new_cp = get_group_iv_cost (data, group, start);
6555
6556       if (data->consider_all_candidates)
6557         {
6558           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6559             {
6560               if (ci == cand->id || (start && ci == start->id))
6561                 continue;
6562
6563               cnd = data->vcands[ci];
6564
6565               cp = get_group_iv_cost (data, group, cnd);
6566               if (!cp)
6567                 continue;
6568
6569               iv_ca_set_cp (data, ivs, group, cp);
6570               acost = iv_ca_cost (ivs);
6571
6572               if (acost < best_cost)
6573                 {
6574                   best_cost = acost;
6575                   new_cp = cp;
6576                 }
6577             }
6578         }
6579       else
6580         {
6581           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6582             {
6583               if (ci == cand->id || (start && ci == start->id))
6584                 continue;
6585
6586               cnd = data->vcands[ci];
6587
6588               cp = get_group_iv_cost (data, group, cnd);
6589               if (!cp)
6590                 continue;
6591
6592               iv_ca_set_cp (data, ivs, group, cp);
6593               acost = iv_ca_cost (ivs);
6594
6595               if (acost < best_cost)
6596                 {
6597                   best_cost = acost;
6598                   new_cp = cp;
6599                 }
6600             }
6601         }
6602       /* Restore to old cp for use.  */
6603       iv_ca_set_cp (data, ivs, group, old_cp);
6604
6605       if (!new_cp)
6606         {
6607           iv_ca_delta_free (delta);
6608           return infinite_cost;
6609         }
6610
6611       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6612     }
6613
6614   iv_ca_delta_commit (data, ivs, *delta, true);
6615   cost = iv_ca_cost (ivs);
6616   iv_ca_delta_commit (data, ivs, *delta, false);
6617
6618   return cost;
6619 }
6620
6621 /* Try optimizing the set of candidates IVS by removing candidates different
6622    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6623    differences in DELTA.  */
6624
6625 static comp_cost
6626 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6627              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6628 {
6629   bitmap_iterator bi;
6630   struct iv_ca_delta *act_delta, *best_delta;
6631   unsigned i;
6632   comp_cost best_cost, acost;
6633   struct iv_cand *cand;
6634
6635   best_delta = NULL;
6636   best_cost = iv_ca_cost (ivs);
6637
6638   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6639     {
6640       cand = data->vcands[i];
6641
6642       if (cand == except_cand)
6643         continue;
6644
6645       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6646
6647       if (acost < best_cost)
6648         {
6649           best_cost = acost;
6650           iv_ca_delta_free (&best_delta);
6651           best_delta = act_delta;
6652         }
6653       else
6654         iv_ca_delta_free (&act_delta);
6655     }
6656
6657   if (!best_delta)
6658     {
6659       *delta = NULL;
6660       return best_cost;
6661     }
6662
6663   /* Recurse to possibly remove other unnecessary ivs.  */
6664   iv_ca_delta_commit (data, ivs, best_delta, true);
6665   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6666   iv_ca_delta_commit (data, ivs, best_delta, false);
6667   *delta = iv_ca_delta_join (best_delta, *delta);
6668   return best_cost;
6669 }
6670
6671 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6672    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6673    the corresponding cost_pair, otherwise just return BEST_CP.  */
6674
6675 static class cost_pair*
6676 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6677                         unsigned int cand_idx, struct iv_cand *old_cand,
6678                         class cost_pair *best_cp)
6679 {
6680   struct iv_cand *cand;
6681   class cost_pair *cp;
6682
6683   gcc_assert (old_cand != NULL && best_cp != NULL);
6684   if (cand_idx == old_cand->id)
6685     return best_cp;
6686
6687   cand = data->vcands[cand_idx];
6688   cp = get_group_iv_cost (data, group, cand);
6689   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6690     return cp;
6691
6692   return best_cp;
6693 }
6694
6695 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6696    which are used by more than one iv uses.  For each of those candidates,
6697    this function tries to represent iv uses under that candidate using
6698    other ones with lower local cost, then tries to prune the new set.
6699    If the new set has lower cost, It returns the new cost after recording
6700    candidate replacement in list DELTA.  */
6701
6702 static comp_cost
6703 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6704                struct iv_ca_delta **delta)
6705 {
6706   bitmap_iterator bi, bj;
6707   unsigned int i, j, k;
6708   struct iv_cand *cand;
6709   comp_cost orig_cost, acost;
6710   struct iv_ca_delta *act_delta, *tmp_delta;
6711   class cost_pair *old_cp, *best_cp = NULL;
6712
6713   *delta = NULL;
6714   orig_cost = iv_ca_cost (ivs);
6715
6716   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6717     {
6718       if (ivs->n_cand_uses[i] == 1
6719           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6720         continue;
6721
6722       cand = data->vcands[i];
6723
6724       act_delta = NULL;
6725       /*  Represent uses under current candidate using other ones with
6726           lower local cost.  */
6727       for (j = 0; j < ivs->upto; j++)
6728         {
6729           struct iv_group *group = data->vgroups[j];
6730           old_cp = iv_ca_cand_for_group (ivs, group);
6731
6732           if (old_cp->cand != cand)
6733             continue;
6734
6735           best_cp = old_cp;
6736           if (data->consider_all_candidates)
6737             for (k = 0; k < data->vcands.length (); k++)
6738               best_cp = cheaper_cost_with_cand (data, group, k,
6739                                                 old_cp->cand, best_cp);
6740           else
6741             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6742               best_cp = cheaper_cost_with_cand (data, group, k,
6743                                                 old_cp->cand, best_cp);
6744
6745           if (best_cp == old_cp)
6746             continue;
6747
6748           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6749         }
6750       /* No need for further prune.  */
6751       if (!act_delta)
6752         continue;
6753
6754       /* Prune the new candidate set.  */
6755       iv_ca_delta_commit (data, ivs, act_delta, true);
6756       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6757       iv_ca_delta_commit (data, ivs, act_delta, false);
6758       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6759
6760       if (acost < orig_cost)
6761         {
6762           *delta = act_delta;
6763           return acost;
6764         }
6765       else
6766         iv_ca_delta_free (&act_delta);
6767     }
6768
6769   return orig_cost;
6770 }
6771
6772 /* Tries to extend the sets IVS in the best possible way in order to
6773    express the GROUP.  If ORIGINALP is true, prefer candidates from
6774    the original set of IVs, otherwise favor important candidates not
6775    based on any memory object.  */
6776
6777 static bool
6778 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6779                   struct iv_group *group, bool originalp)
6780 {
6781   comp_cost best_cost, act_cost;
6782   unsigned i;
6783   bitmap_iterator bi;
6784   struct iv_cand *cand;
6785   struct iv_ca_delta *best_delta = NULL, *act_delta;
6786   class cost_pair *cp;
6787
6788   iv_ca_add_group (data, ivs, group);
6789   best_cost = iv_ca_cost (ivs);
6790   cp = iv_ca_cand_for_group (ivs, group);
6791   if (cp)
6792     {
6793       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6794       iv_ca_set_no_cp (data, ivs, group);
6795     }
6796
6797   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6798      first try important candidates not based on any memory object.  Only if
6799      this fails, try the specific ones.  Rationale -- in loops with many
6800      variables the best choice often is to use just one generic biv.  If we
6801      added here many ivs specific to the uses, the optimization algorithm later
6802      would be likely to get stuck in a local minimum, thus causing us to create
6803      too many ivs.  The approach from few ivs to more seems more likely to be
6804      successful -- starting from few ivs, replacing an expensive use by a
6805      specific iv should always be a win.  */
6806   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6807     {
6808       cand = data->vcands[i];
6809
6810       if (originalp && cand->pos !=IP_ORIGINAL)
6811         continue;
6812
6813       if (!originalp && cand->iv->base_object != NULL_TREE)
6814         continue;
6815
6816       if (iv_ca_cand_used_p (ivs, cand))
6817         continue;
6818
6819       cp = get_group_iv_cost (data, group, cand);
6820       if (!cp)
6821         continue;
6822
6823       iv_ca_set_cp (data, ivs, group, cp);
6824       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6825                                true);
6826       iv_ca_set_no_cp (data, ivs, group);
6827       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6828
6829       if (act_cost < best_cost)
6830         {
6831           best_cost = act_cost;
6832
6833           iv_ca_delta_free (&best_delta);
6834           best_delta = act_delta;
6835         }
6836       else
6837         iv_ca_delta_free (&act_delta);
6838     }
6839
6840   if (best_cost.infinite_cost_p ())
6841     {
6842       for (i = 0; i < group->n_map_members; i++)
6843         {
6844           cp = group->cost_map + i;
6845           cand = cp->cand;
6846           if (!cand)
6847             continue;
6848
6849           /* Already tried this.  */
6850           if (cand->important)
6851             {
6852               if (originalp && cand->pos == IP_ORIGINAL)
6853                 continue;
6854               if (!originalp && cand->iv->base_object == NULL_TREE)
6855                 continue;
6856             }
6857
6858           if (iv_ca_cand_used_p (ivs, cand))
6859             continue;
6860
6861           act_delta = NULL;
6862           iv_ca_set_cp (data, ivs, group, cp);
6863           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6864           iv_ca_set_no_cp (data, ivs, group);
6865           act_delta = iv_ca_delta_add (group,
6866                                        iv_ca_cand_for_group (ivs, group),
6867                                        cp, act_delta);
6868
6869           if (act_cost < best_cost)
6870             {
6871               best_cost = act_cost;
6872
6873               if (best_delta)
6874                 iv_ca_delta_free (&best_delta);
6875               best_delta = act_delta;
6876             }
6877           else
6878             iv_ca_delta_free (&act_delta);
6879         }
6880     }
6881
6882   iv_ca_delta_commit (data, ivs, best_delta, true);
6883   iv_ca_delta_free (&best_delta);
6884
6885   return !best_cost.infinite_cost_p ();
6886 }
6887
6888 /* Finds an initial assignment of candidates to uses.  */
6889
6890 static class iv_ca *
6891 get_initial_solution (struct ivopts_data *data, bool originalp)
6892 {
6893   unsigned i;
6894   class iv_ca *ivs = iv_ca_new (data);
6895
6896   for (i = 0; i < data->vgroups.length (); i++)
6897     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6898       {
6899         iv_ca_free (&ivs);
6900         return NULL;
6901       }
6902
6903   return ivs;
6904 }
6905
6906 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6907    points to a bool variable, this function tries to break local
6908    optimal fixed-point by replacing candidates in IVS if it's true.  */
6909
6910 static bool
6911 try_improve_iv_set (struct ivopts_data *data,
6912                     class iv_ca *ivs, bool *try_replace_p)
6913 {
6914   unsigned i, n_ivs;
6915   comp_cost acost, best_cost = iv_ca_cost (ivs);
6916   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6917   struct iv_cand *cand;
6918
6919   /* Try extending the set of induction variables by one.  */
6920   for (i = 0; i < data->vcands.length (); i++)
6921     {
6922       cand = data->vcands[i];
6923
6924       if (iv_ca_cand_used_p (ivs, cand))
6925         continue;
6926
6927       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6928       if (!act_delta)
6929         continue;
6930
6931       /* If we successfully added the candidate and the set is small enough,
6932          try optimizing it by removing other candidates.  */
6933       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6934         {
6935           iv_ca_delta_commit (data, ivs, act_delta, true);
6936           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6937           iv_ca_delta_commit (data, ivs, act_delta, false);
6938           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6939         }
6940
6941       if (acost < best_cost)
6942         {
6943           best_cost = acost;
6944           iv_ca_delta_free (&best_delta);
6945           best_delta = act_delta;
6946         }
6947       else
6948         iv_ca_delta_free (&act_delta);
6949     }
6950
6951   if (!best_delta)
6952     {
6953       /* Try removing the candidates from the set instead.  */
6954       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6955
6956       if (!best_delta && *try_replace_p)
6957         {
6958           *try_replace_p = false;
6959           /* So far candidate selecting algorithm tends to choose fewer IVs
6960              so that it can handle cases in which loops have many variables
6961              but the best choice is often to use only one general biv.  One
6962              weakness is it can't handle opposite cases, in which different
6963              candidates should be chosen with respect to each use.  To solve
6964              the problem, we replace candidates in a manner described by the
6965              comments of iv_ca_replace, thus give general algorithm a chance
6966              to break local optimal fixed-point in these cases.  */
6967           best_cost = iv_ca_replace (data, ivs, &best_delta);
6968         }
6969
6970       if (!best_delta)
6971         return false;
6972     }
6973
6974   iv_ca_delta_commit (data, ivs, best_delta, true);
6975   iv_ca_delta_free (&best_delta);
6976   return best_cost == iv_ca_cost (ivs);
6977 }
6978
6979 /* Attempts to find the optimal set of induction variables.  We do simple
6980    greedy heuristic -- we try to replace at most one candidate in the selected
6981    solution and remove the unused ivs while this improves the cost.  */
6982
6983 static class iv_ca *
6984 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6985 {
6986   class iv_ca *set;
6987   bool try_replace_p = true;
6988
6989   /* Get the initial solution.  */
6990   set = get_initial_solution (data, originalp);
6991   if (!set)
6992     {
6993       if (dump_file && (dump_flags & TDF_DETAILS))
6994         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6995       return NULL;
6996     }
6997
6998   if (dump_file && (dump_flags & TDF_DETAILS))
6999     {
7000       fprintf (dump_file, "Initial set of candidates:\n");
7001       iv_ca_dump (data, dump_file, set);
7002     }
7003
7004   while (try_improve_iv_set (data, set, &try_replace_p))
7005     {
7006       if (dump_file && (dump_flags & TDF_DETAILS))
7007         {
7008           fprintf (dump_file, "Improved to:\n");
7009           iv_ca_dump (data, dump_file, set);
7010         }
7011     }
7012
7013   /* If the set has infinite_cost, it can't be optimal.  */
7014   if (iv_ca_cost (set).infinite_cost_p ())
7015     {
7016       if (dump_file && (dump_flags & TDF_DETAILS))
7017         fprintf (dump_file,
7018                  "Overflow to infinite cost in try_improve_iv_set.\n");
7019       iv_ca_free (&set);
7020     }
7021   return set;
7022 }
7023
7024 static class iv_ca *
7025 find_optimal_iv_set (struct ivopts_data *data)
7026 {
7027   unsigned i;
7028   comp_cost cost, origcost;
7029   class iv_ca *set, *origset;
7030
7031   /* Determine the cost based on a strategy that starts with original IVs,
7032      and try again using a strategy that prefers candidates not based
7033      on any IVs.  */
7034   origset = find_optimal_iv_set_1 (data, true);
7035   set = find_optimal_iv_set_1 (data, false);
7036
7037   if (!origset && !set)
7038     return NULL;
7039
7040   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7041   cost = set ? iv_ca_cost (set) : infinite_cost;
7042
7043   if (dump_file && (dump_flags & TDF_DETAILS))
7044     {
7045       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7046                origcost.cost, origcost.complexity);
7047       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7048                cost.cost, cost.complexity);
7049     }
7050
7051   /* Choose the one with the best cost.  */
7052   if (origcost <= cost)
7053     {
7054       if (set)
7055         iv_ca_free (&set);
7056       set = origset;
7057     }
7058   else if (origset)
7059     iv_ca_free (&origset);
7060
7061   for (i = 0; i < data->vgroups.length (); i++)
7062     {
7063       struct iv_group *group = data->vgroups[i];
7064       group->selected = iv_ca_cand_for_group (set, group)->cand;
7065     }
7066
7067   return set;
7068 }
7069
7070 /* Creates a new induction variable corresponding to CAND.  */
7071
7072 static void
7073 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7074 {
7075   gimple_stmt_iterator incr_pos;
7076   tree base;
7077   struct iv_use *use;
7078   struct iv_group *group;
7079   bool after = false;
7080
7081   gcc_assert (cand->iv != NULL);
7082
7083   switch (cand->pos)
7084     {
7085     case IP_NORMAL:
7086       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7087       break;
7088
7089     case IP_END:
7090       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7091       after = true;
7092       break;
7093
7094     case IP_AFTER_USE:
7095       after = true;
7096       /* fall through */
7097     case IP_BEFORE_USE:
7098       incr_pos = gsi_for_stmt (cand->incremented_at);
7099       break;
7100
7101     case IP_ORIGINAL:
7102       /* Mark that the iv is preserved.  */
7103       name_info (data, cand->var_before)->preserve_biv = true;
7104       name_info (data, cand->var_after)->preserve_biv = true;
7105
7106       /* Rewrite the increment so that it uses var_before directly.  */
7107       use = find_interesting_uses_op (data, cand->var_after);
7108       group = data->vgroups[use->group_id];
7109       group->selected = cand;
7110       return;
7111     }
7112
7113   gimple_add_tmp_var (cand->var_before);
7114
7115   base = unshare_expr (cand->iv->base);
7116
7117   create_iv (base, unshare_expr (cand->iv->step),
7118              cand->var_before, data->current_loop,
7119              &incr_pos, after, &cand->var_before, &cand->var_after);
7120 }
7121
7122 /* Creates new induction variables described in SET.  */
7123
7124 static void
7125 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7126 {
7127   unsigned i;
7128   struct iv_cand *cand;
7129   bitmap_iterator bi;
7130
7131   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7132     {
7133       cand = data->vcands[i];
7134       create_new_iv (data, cand);
7135     }
7136
7137   if (dump_file && (dump_flags & TDF_DETAILS))
7138     {
7139       fprintf (dump_file, "Selected IV set for loop %d",
7140                data->current_loop->num);
7141       if (data->loop_loc != UNKNOWN_LOCATION)
7142         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7143                  LOCATION_LINE (data->loop_loc));
7144       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7145                avg_loop_niter (data->current_loop));
7146       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7147       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7148         {
7149           cand = data->vcands[i];
7150           dump_cand (dump_file, cand);
7151         }
7152       fprintf (dump_file, "\n");
7153     }
7154 }
7155
7156 /* Rewrites USE (definition of iv used in a nonlinear expression)
7157    using candidate CAND.  */
7158
7159 static void
7160 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7161                             struct iv_use *use, struct iv_cand *cand)
7162 {
7163   gassign *ass;
7164   gimple_stmt_iterator bsi;
7165   tree comp, type = get_use_type (use), tgt;
7166
7167   /* An important special case -- if we are asked to express value of
7168      the original iv by itself, just exit; there is no need to
7169      introduce a new computation (that might also need casting the
7170      variable to unsigned and back).  */
7171   if (cand->pos == IP_ORIGINAL
7172       && cand->incremented_at == use->stmt)
7173     {
7174       tree op = NULL_TREE;
7175       enum tree_code stmt_code;
7176
7177       gcc_assert (is_gimple_assign (use->stmt));
7178       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7179
7180       /* Check whether we may leave the computation unchanged.
7181          This is the case only if it does not rely on other
7182          computations in the loop -- otherwise, the computation
7183          we rely upon may be removed in remove_unused_ivs,
7184          thus leading to ICE.  */
7185       stmt_code = gimple_assign_rhs_code (use->stmt);
7186       if (stmt_code == PLUS_EXPR
7187           || stmt_code == MINUS_EXPR
7188           || stmt_code == POINTER_PLUS_EXPR)
7189         {
7190           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7191             op = gimple_assign_rhs2 (use->stmt);
7192           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7193             op = gimple_assign_rhs1 (use->stmt);
7194         }
7195
7196       if (op != NULL_TREE)
7197         {
7198           if (expr_invariant_in_loop_p (data->current_loop, op))
7199             return;
7200           if (TREE_CODE (op) == SSA_NAME)
7201             {
7202               struct iv *iv = get_iv (data, op);
7203               if (iv != NULL && integer_zerop (iv->step))
7204                 return;
7205             }
7206         }
7207     }
7208
7209   switch (gimple_code (use->stmt))
7210     {
7211     case GIMPLE_PHI:
7212       tgt = PHI_RESULT (use->stmt);
7213
7214       /* If we should keep the biv, do not replace it.  */
7215       if (name_info (data, tgt)->preserve_biv)
7216         return;
7217
7218       bsi = gsi_after_labels (gimple_bb (use->stmt));
7219       break;
7220
7221     case GIMPLE_ASSIGN:
7222       tgt = gimple_assign_lhs (use->stmt);
7223       bsi = gsi_for_stmt (use->stmt);
7224       break;
7225
7226     default:
7227       gcc_unreachable ();
7228     }
7229
7230   aff_tree aff_inv, aff_var;
7231   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7232                               use, cand, &aff_inv, &aff_var))
7233     gcc_unreachable ();
7234
7235   unshare_aff_combination (&aff_inv);
7236   unshare_aff_combination (&aff_var);
7237   /* Prefer CSE opportunity than loop invariant by adding offset at last
7238      so that iv_uses have different offsets can be CSEed.  */
7239   poly_widest_int offset = aff_inv.offset;
7240   aff_inv.offset = 0;
7241
7242   gimple_seq stmt_list = NULL, seq = NULL;
7243   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7244   tree comp_op2 = aff_combination_to_tree (&aff_var);
7245   gcc_assert (comp_op1 && comp_op2);
7246
7247   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7248   gimple_seq_add_seq (&stmt_list, seq);
7249   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7250   gimple_seq_add_seq (&stmt_list, seq);
7251
7252   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7253     std::swap (comp_op1, comp_op2);
7254
7255   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7256     {
7257       comp = fold_build_pointer_plus (comp_op1,
7258                                       fold_convert (sizetype, comp_op2));
7259       comp = fold_build_pointer_plus (comp,
7260                                       wide_int_to_tree (sizetype, offset));
7261     }
7262   else
7263     {
7264       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7265                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7266       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7267                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7268     }
7269
7270   comp = fold_convert (type, comp);
7271   if (!valid_gimple_rhs_p (comp)
7272       || (gimple_code (use->stmt) != GIMPLE_PHI
7273           /* We can't allow re-allocating the stmt as it might be pointed
7274              to still.  */
7275           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7276               >= gimple_num_ops (gsi_stmt (bsi)))))
7277     {
7278       comp = force_gimple_operand (comp, &seq, true, NULL);
7279       gimple_seq_add_seq (&stmt_list, seq);
7280       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7281         {
7282           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7283           /* As this isn't a plain copy we have to reset alignment
7284              information.  */
7285           if (SSA_NAME_PTR_INFO (comp))
7286             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7287         }
7288     }
7289
7290   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7291   if (gimple_code (use->stmt) == GIMPLE_PHI)
7292     {
7293       ass = gimple_build_assign (tgt, comp);
7294       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7295
7296       bsi = gsi_for_stmt (use->stmt);
7297       remove_phi_node (&bsi, false);
7298     }
7299   else
7300     {
7301       gimple_assign_set_rhs_from_tree (&bsi, comp);
7302       use->stmt = gsi_stmt (bsi);
7303     }
7304 }
7305
7306 /* Performs a peephole optimization to reorder the iv update statement with
7307    a mem ref to enable instruction combining in later phases. The mem ref uses
7308    the iv value before the update, so the reordering transformation requires
7309    adjustment of the offset. CAND is the selected IV_CAND.
7310
7311    Example:
7312
7313    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7314    iv2 = iv1 + 1;
7315
7316    if (t < val)      (1)
7317      goto L;
7318    goto Head;
7319
7320
7321    directly propagating t over to (1) will introduce overlapping live range
7322    thus increase register pressure. This peephole transform it into:
7323
7324
7325    iv2 = iv1 + 1;
7326    t = MEM_REF (base, iv2, 8, 8);
7327    if (t < val)
7328      goto L;
7329    goto Head;
7330 */
7331
7332 static void
7333 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7334 {
7335   tree var_after;
7336   gimple *iv_update, *stmt;
7337   basic_block bb;
7338   gimple_stmt_iterator gsi, gsi_iv;
7339
7340   if (cand->pos != IP_NORMAL)
7341     return;
7342
7343   var_after = cand->var_after;
7344   iv_update = SSA_NAME_DEF_STMT (var_after);
7345
7346   bb = gimple_bb (iv_update);
7347   gsi = gsi_last_nondebug_bb (bb);
7348   stmt = gsi_stmt (gsi);
7349
7350   /* Only handle conditional statement for now.  */
7351   if (gimple_code (stmt) != GIMPLE_COND)
7352     return;
7353
7354   gsi_prev_nondebug (&gsi);
7355   stmt = gsi_stmt (gsi);
7356   if (stmt != iv_update)
7357     return;
7358
7359   gsi_prev_nondebug (&gsi);
7360   if (gsi_end_p (gsi))
7361     return;
7362
7363   stmt = gsi_stmt (gsi);
7364   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7365     return;
7366
7367   if (stmt != use->stmt)
7368     return;
7369
7370   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7371     return;
7372
7373   if (dump_file && (dump_flags & TDF_DETAILS))
7374     {
7375       fprintf (dump_file, "Reordering \n");
7376       print_gimple_stmt (dump_file, iv_update, 0);
7377       print_gimple_stmt (dump_file, use->stmt, 0);
7378       fprintf (dump_file, "\n");
7379     }
7380
7381   gsi = gsi_for_stmt (use->stmt);
7382   gsi_iv = gsi_for_stmt (iv_update);
7383   gsi_move_before (&gsi_iv, &gsi);
7384
7385   cand->pos = IP_BEFORE_USE;
7386   cand->incremented_at = use->stmt;
7387 }
7388
7389 /* Return the alias pointer type that should be used for a MEM_REF
7390    associated with USE, which has type USE_PTR_ADDRESS.  */
7391
7392 static tree
7393 get_alias_ptr_type_for_ptr_address (iv_use *use)
7394 {
7395   gcall *call = as_a <gcall *> (use->stmt);
7396   switch (gimple_call_internal_fn (call))
7397     {
7398     case IFN_MASK_LOAD:
7399     case IFN_MASK_STORE:
7400     case IFN_MASK_LOAD_LANES:
7401     case IFN_MASK_STORE_LANES:
7402       /* The second argument contains the correct alias type.  */
7403       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7404       return TREE_TYPE (gimple_call_arg (call, 1));
7405
7406     default:
7407       gcc_unreachable ();
7408     }
7409 }
7410
7411
7412 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7413
7414 static void
7415 rewrite_use_address (struct ivopts_data *data,
7416                      struct iv_use *use, struct iv_cand *cand)
7417 {
7418   aff_tree aff;
7419   bool ok;
7420
7421   adjust_iv_update_pos (cand, use);
7422   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7423   gcc_assert (ok);
7424   unshare_aff_combination (&aff);
7425
7426   /* To avoid undefined overflow problems, all IV candidates use unsigned
7427      integer types.  The drawback is that this makes it impossible for
7428      create_mem_ref to distinguish an IV that is based on a memory object
7429      from one that represents simply an offset.
7430
7431      To work around this problem, we pass a hint to create_mem_ref that
7432      indicates which variable (if any) in aff is an IV based on a memory
7433      object.  Note that we only consider the candidate.  If this is not
7434      based on an object, the base of the reference is in some subexpression
7435      of the use -- but these will use pointer types, so they are recognized
7436      by the create_mem_ref heuristics anyway.  */
7437   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7438   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7439   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7440   tree type = use->mem_type;
7441   tree alias_ptr_type;
7442   if (use->type == USE_PTR_ADDRESS)
7443     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7444   else
7445     {
7446       gcc_assert (type == TREE_TYPE (*use->op_p));
7447       unsigned int align = get_object_alignment (*use->op_p);
7448       if (align != TYPE_ALIGN (type))
7449         type = build_aligned_type (type, align);
7450       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7451     }
7452   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7453                              iv, base_hint, data->speed);
7454
7455   if (use->type == USE_PTR_ADDRESS)
7456     {
7457       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7458       ref = fold_convert (get_use_type (use), ref);
7459       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7460                                       true, GSI_SAME_STMT);
7461     }
7462   else
7463     copy_ref_info (ref, *use->op_p);
7464
7465   *use->op_p = ref;
7466 }
7467
7468 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7469    candidate CAND.  */
7470
7471 static void
7472 rewrite_use_compare (struct ivopts_data *data,
7473                      struct iv_use *use, struct iv_cand *cand)
7474 {
7475   tree comp, op, bound;
7476   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7477   enum tree_code compare;
7478   struct iv_group *group = data->vgroups[use->group_id];
7479   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7480
7481   bound = cp->value;
7482   if (bound)
7483     {
7484       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7485       tree var_type = TREE_TYPE (var);
7486       gimple_seq stmts;
7487
7488       if (dump_file && (dump_flags & TDF_DETAILS))
7489         {
7490           fprintf (dump_file, "Replacing exit test: ");
7491           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7492         }
7493       compare = cp->comp;
7494       bound = unshare_expr (fold_convert (var_type, bound));
7495       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7496       if (stmts)
7497         gsi_insert_seq_on_edge_immediate (
7498                 loop_preheader_edge (data->current_loop),
7499                 stmts);
7500
7501       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7502       gimple_cond_set_lhs (cond_stmt, var);
7503       gimple_cond_set_code (cond_stmt, compare);
7504       gimple_cond_set_rhs (cond_stmt, op);
7505       return;
7506     }
7507
7508   /* The induction variable elimination failed; just express the original
7509      giv.  */
7510   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7511   gcc_assert (comp != NULL_TREE);
7512   gcc_assert (use->op_p != NULL);
7513   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7514                                          SSA_NAME_VAR (*use->op_p),
7515                                          true, GSI_SAME_STMT);
7516 }
7517
7518 /* Rewrite the groups using the selected induction variables.  */
7519
7520 static void
7521 rewrite_groups (struct ivopts_data *data)
7522 {
7523   unsigned i, j;
7524
7525   for (i = 0; i < data->vgroups.length (); i++)
7526     {
7527       struct iv_group *group = data->vgroups[i];
7528       struct iv_cand *cand = group->selected;
7529
7530       gcc_assert (cand);
7531
7532       if (group->type == USE_NONLINEAR_EXPR)
7533         {
7534           for (j = 0; j < group->vuses.length (); j++)
7535             {
7536               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7537               update_stmt (group->vuses[j]->stmt);
7538             }
7539         }
7540       else if (address_p (group->type))
7541         {
7542           for (j = 0; j < group->vuses.length (); j++)
7543             {
7544               rewrite_use_address (data, group->vuses[j], cand);
7545               update_stmt (group->vuses[j]->stmt);
7546             }
7547         }
7548       else
7549         {
7550           gcc_assert (group->type == USE_COMPARE);
7551
7552           for (j = 0; j < group->vuses.length (); j++)
7553             {
7554               rewrite_use_compare (data, group->vuses[j], cand);
7555               update_stmt (group->vuses[j]->stmt);
7556             }
7557         }
7558     }
7559 }
7560
7561 /* Removes the ivs that are not used after rewriting.  */
7562
7563 static void
7564 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7565 {
7566   unsigned j;
7567   bitmap_iterator bi;
7568
7569   /* Figure out an order in which to release SSA DEFs so that we don't
7570      release something that we'd have to propagate into a debug stmt
7571      afterwards.  */
7572   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7573     {
7574       struct version_info *info;
7575
7576       info = ver_info (data, j);
7577       if (info->iv
7578           && !integer_zerop (info->iv->step)
7579           && !info->inv_id
7580           && !info->iv->nonlin_use
7581           && !info->preserve_biv)
7582         {
7583           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7584
7585           tree def = info->iv->ssa_name;
7586
7587           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7588             {
7589               imm_use_iterator imm_iter;
7590               use_operand_p use_p;
7591               gimple *stmt;
7592               int count = 0;
7593
7594               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7595                 {
7596                   if (!gimple_debug_bind_p (stmt))
7597                     continue;
7598
7599                   /* We just want to determine whether to do nothing
7600                      (count == 0), to substitute the computed
7601                      expression into a single use of the SSA DEF by
7602                      itself (count == 1), or to use a debug temp
7603                      because the SSA DEF is used multiple times or as
7604                      part of a larger expression (count > 1). */
7605                   count++;
7606                   if (gimple_debug_bind_get_value (stmt) != def)
7607                     count++;
7608
7609                   if (count > 1)
7610                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7611                 }
7612
7613               if (!count)
7614                 continue;
7615
7616               struct iv_use dummy_use;
7617               struct iv_cand *best_cand = NULL, *cand;
7618               unsigned i, best_pref = 0, cand_pref;
7619               tree comp = NULL_TREE;
7620
7621               memset (&dummy_use, 0, sizeof (dummy_use));
7622               dummy_use.iv = info->iv;
7623               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7624                 {
7625                   cand = data->vgroups[i]->selected;
7626                   if (cand == best_cand)
7627                     continue;
7628                   cand_pref = operand_equal_p (cand->iv->step,
7629                                                info->iv->step, 0)
7630                     ? 4 : 0;
7631                   cand_pref
7632                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7633                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7634                     ? 2 : 0;
7635                   cand_pref
7636                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7637                     ? 1 : 0;
7638                   if (best_cand == NULL || best_pref < cand_pref)
7639                     {
7640                       tree this_comp
7641                         = get_debug_computation_at (data->current_loop,
7642                                                     SSA_NAME_DEF_STMT (def),
7643                                                     &dummy_use, cand);
7644                       if (this_comp)
7645                         {
7646                           best_cand = cand;
7647                           best_pref = cand_pref;
7648                           comp = this_comp;
7649                         }
7650                     }
7651                 }
7652
7653               if (!best_cand)
7654                 continue;
7655
7656               comp = unshare_expr (comp);
7657               if (count > 1)
7658                 {
7659                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7660                   DECL_ARTIFICIAL (vexpr) = 1;
7661                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7662                   if (SSA_NAME_VAR (def))
7663                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7664                   else
7665                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7666                   gdebug *def_temp
7667                     = gimple_build_debug_bind (vexpr, comp, NULL);
7668                   gimple_stmt_iterator gsi;
7669
7670                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7671                     gsi = gsi_after_labels (gimple_bb
7672                                             (SSA_NAME_DEF_STMT (def)));
7673                   else
7674                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7675
7676                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7677                   comp = vexpr;
7678                 }
7679
7680               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7681                 {
7682                   if (!gimple_debug_bind_p (stmt))
7683                     continue;
7684
7685                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7686                     SET_USE (use_p, comp);
7687
7688                   update_stmt (stmt);
7689                 }
7690             }
7691         }
7692     }
7693 }
7694
7695 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7696    for hash_map::traverse.  */
7697
7698 bool
7699 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7700 {
7701   free (value);
7702   return true;
7703 }
7704
7705 /* Frees data allocated by the optimization of a single loop.  */
7706
7707 static void
7708 free_loop_data (struct ivopts_data *data)
7709 {
7710   unsigned i, j;
7711   bitmap_iterator bi;
7712   tree obj;
7713
7714   if (data->niters)
7715     {
7716       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7717       delete data->niters;
7718       data->niters = NULL;
7719     }
7720
7721   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7722     {
7723       struct version_info *info;
7724
7725       info = ver_info (data, i);
7726       info->iv = NULL;
7727       info->has_nonlin_use = false;
7728       info->preserve_biv = false;
7729       info->inv_id = 0;
7730     }
7731   bitmap_clear (data->relevant);
7732   bitmap_clear (data->important_candidates);
7733
7734   for (i = 0; i < data->vgroups.length (); i++)
7735     {
7736       struct iv_group *group = data->vgroups[i];
7737
7738       for (j = 0; j < group->vuses.length (); j++)
7739         free (group->vuses[j]);
7740       group->vuses.release ();
7741
7742       BITMAP_FREE (group->related_cands);
7743       for (j = 0; j < group->n_map_members; j++)
7744         {
7745           if (group->cost_map[j].inv_vars)
7746             BITMAP_FREE (group->cost_map[j].inv_vars);
7747           if (group->cost_map[j].inv_exprs)
7748             BITMAP_FREE (group->cost_map[j].inv_exprs);
7749         }
7750
7751       free (group->cost_map);
7752       free (group);
7753     }
7754   data->vgroups.truncate (0);
7755
7756   for (i = 0; i < data->vcands.length (); i++)
7757     {
7758       struct iv_cand *cand = data->vcands[i];
7759
7760       if (cand->inv_vars)
7761         BITMAP_FREE (cand->inv_vars);
7762       if (cand->inv_exprs)
7763         BITMAP_FREE (cand->inv_exprs);
7764       free (cand);
7765     }
7766   data->vcands.truncate (0);
7767
7768   if (data->version_info_size < num_ssa_names)
7769     {
7770       data->version_info_size = 2 * num_ssa_names;
7771       free (data->version_info);
7772       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7773     }
7774
7775   data->max_inv_var_id = 0;
7776   data->max_inv_expr_id = 0;
7777
7778   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7779     SET_DECL_RTL (obj, NULL_RTX);
7780
7781   decl_rtl_to_reset.truncate (0);
7782
7783   data->inv_expr_tab->empty ();
7784
7785   data->iv_common_cand_tab->empty ();
7786   data->iv_common_cands.truncate (0);
7787 }
7788
7789 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7790    loop tree.  */
7791
7792 static void
7793 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7794 {
7795   free_loop_data (data);
7796   free (data->version_info);
7797   BITMAP_FREE (data->relevant);
7798   BITMAP_FREE (data->important_candidates);
7799
7800   decl_rtl_to_reset.release ();
7801   data->vgroups.release ();
7802   data->vcands.release ();
7803   delete data->inv_expr_tab;
7804   data->inv_expr_tab = NULL;
7805   free_affine_expand_cache (&data->name_expansion_cache);
7806   if (data->base_object_map)
7807     delete data->base_object_map;
7808   delete data->iv_common_cand_tab;
7809   data->iv_common_cand_tab = NULL;
7810   data->iv_common_cands.release ();
7811   obstack_free (&data->iv_obstack, NULL);
7812 }
7813
7814 /* Returns true if the loop body BODY includes any function calls.  */
7815
7816 static bool
7817 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7818 {
7819   gimple_stmt_iterator gsi;
7820   unsigned i;
7821
7822   for (i = 0; i < num_nodes; i++)
7823     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7824       {
7825         gimple *stmt = gsi_stmt (gsi);
7826         if (is_gimple_call (stmt)
7827             && !gimple_call_internal_p (stmt)
7828             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7829           return true;
7830       }
7831   return false;
7832 }
7833
7834 /* Determine cost scaling factor for basic blocks in loop.  */
7835 #define COST_SCALING_FACTOR_BOUND (20)
7836
7837 static void
7838 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7839 {
7840   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7841   if (!data->speed || lfreq <= 0)
7842     return;
7843
7844   int max_freq = lfreq;
7845   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7846     {
7847       body[i]->aux = (void *)(intptr_t) 1;
7848       if (max_freq < body[i]->count.to_frequency (cfun))
7849         max_freq = body[i]->count.to_frequency (cfun);
7850     }
7851   if (max_freq > lfreq)
7852     {
7853       int divisor, factor;
7854       /* Check if scaling factor itself needs to be scaled by the bound.  This
7855          is to avoid overflow when scaling cost according to profile info.  */
7856       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7857         {
7858           divisor = max_freq;
7859           factor = COST_SCALING_FACTOR_BOUND;
7860         }
7861       else
7862         {
7863           divisor = lfreq;
7864           factor = 1;
7865         }
7866       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7867         {
7868           int bfreq = body[i]->count.to_frequency (cfun);
7869           if (bfreq <= lfreq)
7870             continue;
7871
7872           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7873         }
7874     }
7875 }
7876
7877 /* Find doloop comparison use and set its doloop_p on if found.  */
7878
7879 static bool
7880 find_doloop_use (struct ivopts_data *data)
7881 {
7882   struct loop *loop = data->current_loop;
7883
7884   for (unsigned i = 0; i < data->vgroups.length (); i++)
7885     {
7886       struct iv_group *group = data->vgroups[i];
7887       if (group->type == USE_COMPARE)
7888         {
7889           gcc_assert (group->vuses.length () == 1);
7890           struct iv_use *use = group->vuses[0];
7891           gimple *stmt = use->stmt;
7892           if (gimple_code (stmt) == GIMPLE_COND)
7893             {
7894               basic_block bb = gimple_bb (stmt);
7895               edge true_edge, false_edge;
7896               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
7897               /* This comparison is used for loop latch.  Require latch is empty
7898                  for now.  */
7899               if ((loop->latch == true_edge->dest
7900                    || loop->latch == false_edge->dest)
7901                   && empty_block_p (loop->latch))
7902                 {
7903                   group->doloop_p = true;
7904                   if (dump_file && (dump_flags & TDF_DETAILS))
7905                     {
7906                       fprintf (dump_file, "Doloop cmp iv use: ");
7907                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
7908                     }
7909                   return true;
7910                 }
7911             }
7912         }
7913     }
7914
7915   return false;
7916 }
7917
7918 /* For the targets which support doloop, to predict whether later RTL doloop
7919    transformation will perform on this loop, further detect the doloop use and
7920    mark the flag doloop_use_p if predicted.  */
7921
7922 void
7923 analyze_and_mark_doloop_use (struct ivopts_data *data)
7924 {
7925   data->doloop_use_p = false;
7926
7927   if (!flag_branch_on_count_reg)
7928     return;
7929
7930   if (!generic_predict_doloop_p (data))
7931     return;
7932
7933   if (find_doloop_use (data))
7934     {
7935       data->doloop_use_p = true;
7936       if (dump_file && (dump_flags & TDF_DETAILS))
7937         {
7938           struct loop *loop = data->current_loop;
7939           fprintf (dump_file,
7940                    "Predict loop %d can perform"
7941                    " doloop optimization later.\n",
7942                    loop->num);
7943           flow_loop_dump (loop, dump_file, NULL, 1);
7944         }
7945     }
7946 }
7947
7948 /* Optimizes the LOOP.  Returns true if anything changed.  */
7949
7950 static bool
7951 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
7952                            bitmap toremove)
7953 {
7954   bool changed = false;
7955   class iv_ca *iv_ca;
7956   edge exit = single_dom_exit (loop);
7957   basic_block *body;
7958
7959   gcc_assert (!data->niters);
7960   data->current_loop = loop;
7961   data->loop_loc = find_loop_location (loop).get_location_t ();
7962   data->speed = optimize_loop_for_speed_p (loop);
7963
7964   if (dump_file && (dump_flags & TDF_DETAILS))
7965     {
7966       fprintf (dump_file, "Processing loop %d", loop->num);
7967       if (data->loop_loc != UNKNOWN_LOCATION)
7968         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7969                  LOCATION_LINE (data->loop_loc));
7970       fprintf (dump_file, "\n");
7971
7972       if (exit)
7973         {
7974           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7975                    exit->src->index, exit->dest->index);
7976           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7977           fprintf (dump_file, "\n");
7978         }
7979
7980       fprintf (dump_file, "\n");
7981     }
7982
7983   body = get_loop_body (loop);
7984   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7985   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7986
7987   data->loop_single_exit_p
7988     = exit != NULL && loop_only_exit_p (loop, body, exit);
7989
7990   /* For each ssa name determines whether it behaves as an induction variable
7991      in some loop.  */
7992   if (!find_induction_variables (data))
7993     goto finish;
7994
7995   /* Finds interesting uses (item 1).  */
7996   find_interesting_uses (data);
7997   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7998     goto finish;
7999
8000   /* Determine cost scaling factor for basic blocks in loop.  */
8001   determine_scaling_factor (data, body);
8002
8003   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8004   analyze_and_mark_doloop_use (data);
8005
8006   /* Finds candidates for the induction variables (item 2).  */
8007   find_iv_candidates (data);
8008
8009   /* Calculates the costs (item 3, part 1).  */
8010   determine_iv_costs (data);
8011   determine_group_iv_costs (data);
8012   determine_set_costs (data);
8013
8014   /* Find the optimal set of induction variables (item 3, part 2).  */
8015   iv_ca = find_optimal_iv_set (data);
8016   /* Cleanup basic block aux field.  */
8017   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8018     body[i]->aux = NULL;
8019   if (!iv_ca)
8020     goto finish;
8021   changed = true;
8022
8023   /* Create the new induction variables (item 4, part 1).  */
8024   create_new_ivs (data, iv_ca);
8025   iv_ca_free (&iv_ca);
8026
8027   /* Rewrite the uses (item 4, part 2).  */
8028   rewrite_groups (data);
8029
8030   /* Remove the ivs that are unused after rewriting.  */
8031   remove_unused_ivs (data, toremove);
8032
8033 finish:
8034   free (body);
8035   free_loop_data (data);
8036
8037   return changed;
8038 }
8039
8040 /* Main entry point.  Optimizes induction variables in loops.  */
8041
8042 void
8043 tree_ssa_iv_optimize (void)
8044 {
8045   class loop *loop;
8046   struct ivopts_data data;
8047   auto_bitmap toremove;
8048
8049   tree_ssa_iv_optimize_init (&data);
8050
8051   /* Optimize the loops starting with the innermost ones.  */
8052   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8053     {
8054       if (!dbg_cnt (ivopts_loop))
8055         continue;
8056
8057       if (dump_file && (dump_flags & TDF_DETAILS))
8058         flow_loop_dump (loop, dump_file, NULL, 1);
8059
8060       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8061     }
8062
8063   /* Remove eliminated IV defs.  */
8064   release_defs_bitset (toremove);
8065
8066   /* We have changed the structure of induction variables; it might happen
8067      that definitions in the scev database refer to some of them that were
8068      eliminated.  */
8069   scev_reset_htab ();
8070   /* Likewise niter and control-IV information.  */
8071   free_numbers_of_iterations_estimates (cfun);
8072
8073   tree_ssa_iv_optimize_finalize (&data);
8074 }
8075
8076 #include "gt-tree-ssa-loop-ivopts.h"