gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2021 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134
 135 /* For lang_hooks.types.type_for_mode.  */
 136 #include "langhooks.h"
 137
 138 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 139    cost of different addressing modes.  This should be moved to a TBD
 140    interface between the GIMPLE and RTL worlds.  */
 141
 142 /* The infinite cost.  */
 143 #define INFTY 1000000000
 144
 145 /* Returns the expected number of loop iterations for LOOP.
 146    The average trip count is computed from profile data if it
 147    exists. */
 148
 149 static inline HOST_WIDE_INT
 150 avg_loop_niter (class loop *loop)
 151 {
 152   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 153   if (niter == -1)
 154     {
 155       niter = likely_max_stmt_executions_int (loop);
 156
 157       if (niter == -1 || niter > param_avg_loop_niter)
 158         return param_avg_loop_niter;
 159     }
 160
 161   return niter;
 162 }
 163
 164 struct iv_use;
 165
 166 /* Representation of the induction variable.  */
 167 struct iv
 168 {
 169   tree base;            /* Initial value of the iv.  */
 170   tree base_object;     /* A memory object to that the induction variable points.  */
 171   tree step;            /* Step of the iv (constant only).  */
 172   tree ssa_name;        /* The ssa name with the value.  */
 173   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 174   bool biv_p;           /* Is it a biv?  */
 175   bool no_overflow;     /* True if the iv doesn't overflow.  */
 176   bool have_address_use;/* For biv, indicate if it's used in any address
 177                            type use.  */
 178 };
 179
 180 /* Per-ssa version information (induction variable descriptions, etc.).  */
 181 struct version_info
 182 {
 183   tree name;            /* The ssa name.  */
 184   struct iv *iv;        /* Induction variable description.  */
 185   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 186                            an expression that is not an induction variable.  */
 187   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 188   unsigned inv_id;      /* Id of an invariant.  */
 189 };
 190
 191 /* Types of uses.  */
 192 enum use_type
 193 {
 194   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 195   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 196                            reference.  */
 197   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 198                            cases where the expansion of the function
 199                            will turn the argument into a normal address.  */
 200   USE_COMPARE           /* Use is a compare.  */
 201 };
 202
 203 /* Cost of a computation.  */
 204 class comp_cost
 205 {
 206 public:
 207   comp_cost (): cost (0), complexity (0), scratch (0)
 208   {}
 209
 210   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 211     : cost (cost), complexity (complexity), scratch (scratch)
 212   {}
 213
 214   /* Returns true if COST is infinite.  */
 215   bool infinite_cost_p ();
 216
 217   /* Adds costs COST1 and COST2.  */
 218   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 219
 220   /* Adds COST to the comp_cost.  */
 221   comp_cost operator+= (comp_cost cost);
 222
 223   /* Adds constant C to this comp_cost.  */
 224   comp_cost operator+= (HOST_WIDE_INT c);
 225
 226   /* Subtracts constant C to this comp_cost.  */
 227   comp_cost operator-= (HOST_WIDE_INT c);
 228
 229   /* Divide the comp_cost by constant C.  */
 230   comp_cost operator/= (HOST_WIDE_INT c);
 231
 232   /* Multiply the comp_cost by constant C.  */
 233   comp_cost operator*= (HOST_WIDE_INT c);
 234
 235   /* Subtracts costs COST1 and COST2.  */
 236   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 237
 238   /* Subtracts COST from this comp_cost.  */
 239   comp_cost operator-= (comp_cost cost);
 240
 241   /* Returns true if COST1 is smaller than COST2.  */
 242   friend bool operator< (comp_cost cost1, comp_cost cost2);
 243
 244   /* Returns true if COST1 and COST2 are equal.  */
 245   friend bool operator== (comp_cost cost1, comp_cost cost2);
 246
 247   /* Returns true if COST1 is smaller or equal than COST2.  */
 248   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 249
 250   int64_t cost;         /* The runtime cost.  */
 251   unsigned complexity;  /* The estimate of the complexity of the code for
 252                            the computation (in no concrete units --
 253                            complexity field should be larger for more
 254                            complex expressions and addressing modes).  */
 255   int64_t scratch;      /* Scratch used during cost computation.  */
 256 };
 257
 258 static const comp_cost no_cost;
 259 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 260
 261 bool
 262 comp_cost::infinite_cost_p ()
 263 {
 264   return cost == INFTY;
 265 }
 266
 267 comp_cost
 268 operator+ (comp_cost cost1, comp_cost cost2)
 269 {
 270   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 271     return infinite_cost;
 272
 273   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 274   cost1.cost += cost2.cost;
 275   cost1.complexity += cost2.complexity;
 276
 277   return cost1;
 278 }
 279
 280 comp_cost
 281 operator- (comp_cost cost1, comp_cost cost2)
 282 {
 283   if (cost1.infinite_cost_p ())
 284     return infinite_cost;
 285
 286   gcc_assert (!cost2.infinite_cost_p ());
 287   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 288
 289   cost1.cost -= cost2.cost;
 290   cost1.complexity -= cost2.complexity;
 291
 292   return cost1;
 293 }
 294
 295 comp_cost
 296 comp_cost::operator+= (comp_cost cost)
 297 {
 298   *this = *this + cost;
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator+= (HOST_WIDE_INT c)
 304 {
 305   if (c >= INFTY)
 306     this->cost = INFTY;
 307
 308   if (infinite_cost_p ())
 309     return *this;
 310
 311   gcc_assert (this->cost + c < infinite_cost.cost);
 312   this->cost += c;
 313
 314   return *this;
 315 }
 316
 317 comp_cost
 318 comp_cost::operator-= (HOST_WIDE_INT c)
 319 {
 320   if (infinite_cost_p ())
 321     return *this;
 322
 323   gcc_assert (this->cost - c < infinite_cost.cost);
 324   this->cost -= c;
 325
 326   return *this;
 327 }
 328
 329 comp_cost
 330 comp_cost::operator/= (HOST_WIDE_INT c)
 331 {
 332   gcc_assert (c != 0);
 333   if (infinite_cost_p ())
 334     return *this;
 335
 336   this->cost /= c;
 337
 338   return *this;
 339 }
 340
 341 comp_cost
 342 comp_cost::operator*= (HOST_WIDE_INT c)
 343 {
 344   if (infinite_cost_p ())
 345     return *this;
 346
 347   gcc_assert (this->cost * c < infinite_cost.cost);
 348   this->cost *= c;
 349
 350   return *this;
 351 }
 352
 353 comp_cost
 354 comp_cost::operator-= (comp_cost cost)
 355 {
 356   *this = *this - cost;
 357   return *this;
 358 }
 359
 360 bool
 361 operator< (comp_cost cost1, comp_cost cost2)
 362 {
 363   if (cost1.cost == cost2.cost)
 364     return cost1.complexity < cost2.complexity;
 365
 366   return cost1.cost < cost2.cost;
 367 }
 368
 369 bool
 370 operator== (comp_cost cost1, comp_cost cost2)
 371 {
 372   return cost1.cost == cost2.cost
 373     && cost1.complexity == cost2.complexity;
 374 }
 375
 376 bool
 377 operator<= (comp_cost cost1, comp_cost cost2)
 378 {
 379   return cost1 < cost2 || cost1 == cost2;
 380 }
 381
 382 struct iv_inv_expr_ent;
 383
 384 /* The candidate - cost pair.  */
 385 class cost_pair
 386 {
 387 public:
 388   struct iv_cand *cand; /* The candidate.  */
 389   comp_cost cost;       /* The cost.  */
 390   enum tree_code comp;  /* For iv elimination, the comparison.  */
 391   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 392                            preserved when representing iv_use with iv_cand.  */
 393   bitmap inv_exprs;     /* The list of newly created invariant expressions
 394                            when representing iv_use with iv_cand.  */
 395   tree value;           /* For final value elimination, the expression for
 396                            the final value of the iv.  For iv elimination,
 397                            the new bound to compare with.  */
 398 };
 399
 400 /* Use.  */
 401 struct iv_use
 402 {
 403   unsigned id;          /* The id of the use.  */
 404   unsigned group_id;    /* The group id the use belongs to.  */
 405   enum use_type type;   /* Type of the use.  */
 406   tree mem_type;        /* The memory type to use when testing whether an
 407                            address is legitimate, and what the address's
 408                            cost is.  */
 409   struct iv *iv;        /* The induction variable it is based on.  */
 410   gimple *stmt;         /* Statement in that it occurs.  */
 411   tree *op_p;           /* The place where it occurs.  */
 412
 413   tree addr_base;       /* Base address with const offset stripped.  */
 414   poly_uint64_pod addr_offset;
 415                         /* Const offset stripped from base address.  */
 416 };
 417
 418 /* Group of uses.  */
 419 struct iv_group
 420 {
 421   /* The id of the group.  */
 422   unsigned id;
 423   /* Uses of the group are of the same type.  */
 424   enum use_type type;
 425   /* The set of "related" IV candidates, plus the important ones.  */
 426   bitmap related_cands;
 427   /* Number of IV candidates in the cost_map.  */
 428   unsigned n_map_members;
 429   /* The costs wrto the iv candidates.  */
 430   class cost_pair *cost_map;
 431   /* The selected candidate for the group.  */
 432   struct iv_cand *selected;
 433   /* To indicate this is a doloop use group.  */
 434   bool doloop_p;
 435   /* Uses in the group.  */
 436   vec<struct iv_use *> vuses;
 437 };
 438
 439 /* The position where the iv is computed.  */
 440 enum iv_position
 441 {
 442   IP_NORMAL,            /* At the end, just before the exit condition.  */
 443   IP_END,               /* At the end of the latch block.  */
 444   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 445   IP_AFTER_USE,         /* Immediately after a specific use.  */
 446   IP_ORIGINAL           /* The original biv.  */
 447 };
 448
 449 /* The induction variable candidate.  */
 450 struct iv_cand
 451 {
 452   unsigned id;          /* The number of the candidate.  */
 453   bool important;       /* Whether this is an "important" candidate, i.e. such
 454                            that it should be considered by all uses.  */
 455   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 456   gimple *incremented_at;/* For original biv, the statement where it is
 457                            incremented.  */
 458   tree var_before;      /* The variable used for it before increment.  */
 459   tree var_after;       /* The variable used for it after increment.  */
 460   struct iv *iv;        /* The value of the candidate.  NULL for
 461                            "pseudocandidate" used to indicate the possibility
 462                            to replace the final value of an iv by direct
 463                            computation of the value.  */
 464   unsigned cost;        /* Cost of the candidate.  */
 465   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 466   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 467                               where it is incremented.  */
 468   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 469                            iv_cand.  */
 470   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 471                            hanlde it as a new invariant expression which will
 472                            be hoisted out of loop.  */
 473   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 474                            smaller type.  */
 475   bool doloop_p;        /* Whether this is a doloop candidate.  */
 476 };
 477
 478 /* Hashtable entry for common candidate derived from iv uses.  */
 479 class iv_common_cand
 480 {
 481 public:
 482   tree base;
 483   tree step;
 484   /* IV uses from which this common candidate is derived.  */
 485   auto_vec<struct iv_use *> uses;
 486   hashval_t hash;
 487 };
 488
 489 /* Hashtable helpers.  */
 490
 491 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 492 {
 493   static inline hashval_t hash (const iv_common_cand *);
 494   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 495 };
 496
 497 /* Hash function for possible common candidates.  */
 498
 499 inline hashval_t
 500 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 501 {
 502   return ccand->hash;
 503 }
 504
 505 /* Hash table equality function for common candidates.  */
 506
 507 inline bool
 508 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 509                               const iv_common_cand *ccand2)
 510 {
 511   return (ccand1->hash == ccand2->hash
 512           && operand_equal_p (ccand1->base, ccand2->base, 0)
 513           && operand_equal_p (ccand1->step, ccand2->step, 0)
 514           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 515               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 516 }
 517
 518 /* Loop invariant expression hashtable entry.  */
 519
 520 struct iv_inv_expr_ent
 521 {
 522   /* Tree expression of the entry.  */
 523   tree expr;
 524   /* Unique indentifier.  */
 525   int id;
 526   /* Hash value.  */
 527   hashval_t hash;
 528 };
 529
 530 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 531
 532 static int
 533 sort_iv_inv_expr_ent (const void *a, const void *b)
 534 {
 535   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 536   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 537
 538   unsigned id1 = (*e1)->id;
 539   unsigned id2 = (*e2)->id;
 540
 541   if (id1 < id2)
 542     return -1;
 543   else if (id1 > id2)
 544     return 1;
 545   else
 546     return 0;
 547 }
 548
 549 /* Hashtable helpers.  */
 550
 551 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 552 {
 553   static inline hashval_t hash (const iv_inv_expr_ent *);
 554   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 555 };
 556
 557 /* Return true if uses of type TYPE represent some form of address.  */
 558
 559 inline bool
 560 address_p (use_type type)
 561 {
 562   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 563 }
 564
 565 /* Hash function for loop invariant expressions.  */
 566
 567 inline hashval_t
 568 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 569 {
 570   return expr->hash;
 571 }
 572
 573 /* Hash table equality function for expressions.  */
 574
 575 inline bool
 576 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 577                            const iv_inv_expr_ent *expr2)
 578 {
 579   return expr1->hash == expr2->hash
 580          && operand_equal_p (expr1->expr, expr2->expr, 0);
 581 }
 582
 583 struct ivopts_data
 584 {
 585   /* The currently optimized loop.  */
 586   class loop *current_loop;
 587   location_t loop_loc;
 588
 589   /* Numbers of iterations for all exits of the current loop.  */
 590   hash_map<edge, tree_niter_desc *> *niters;
 591
 592   /* Number of registers used in it.  */
 593   unsigned regs_used;
 594
 595   /* The size of version_info array allocated.  */
 596   unsigned version_info_size;
 597
 598   /* The array of information for the ssa names.  */
 599   struct version_info *version_info;
 600
 601   /* The hashtable of loop invariant expressions created
 602      by ivopt.  */
 603   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 604
 605   /* The bitmap of indices in version_info whose value was changed.  */
 606   bitmap relevant;
 607
 608   /* The uses of induction variables.  */
 609   vec<iv_group *> vgroups;
 610
 611   /* The candidates.  */
 612   vec<iv_cand *> vcands;
 613
 614   /* A bitmap of important candidates.  */
 615   bitmap important_candidates;
 616
 617   /* Cache used by tree_to_aff_combination_expand.  */
 618   hash_map<tree, name_expansion *> *name_expansion_cache;
 619
 620   /* The hashtable of common candidates derived from iv uses.  */
 621   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 622
 623   /* The common candidates.  */
 624   vec<iv_common_cand *> iv_common_cands;
 625
 626   /* Hash map recording base object information of tree exp.  */
 627   hash_map<tree, tree> *base_object_map;
 628
 629   /* The maximum invariant variable id.  */
 630   unsigned max_inv_var_id;
 631
 632   /* The maximum invariant expression id.  */
 633   unsigned max_inv_expr_id;
 634
 635   /* Number of no_overflow BIVs which are not used in memory address.  */
 636   unsigned bivs_not_used_in_addr;
 637
 638   /* Obstack for iv structure.  */
 639   struct obstack iv_obstack;
 640
 641   /* Whether to consider just related and important candidates when replacing a
 642      use.  */
 643   bool consider_all_candidates;
 644
 645   /* Are we optimizing for speed?  */
 646   bool speed;
 647
 648   /* Whether the loop body includes any function calls.  */
 649   bool body_includes_call;
 650
 651   /* Whether the loop body can only be exited via single exit.  */
 652   bool loop_single_exit_p;
 653
 654   /* Whether the loop has doloop comparison use.  */
 655   bool doloop_use_p;
 656 };
 657
 658 /* An assignment of iv candidates to uses.  */
 659
 660 class iv_ca
 661 {
 662 public:
 663   /* The number of uses covered by the assignment.  */
 664   unsigned upto;
 665
 666   /* Number of uses that cannot be expressed by the candidates in the set.  */
 667   unsigned bad_groups;
 668
 669   /* Candidate assigned to a use, together with the related costs.  */
 670   class cost_pair **cand_for_group;
 671
 672   /* Number of times each candidate is used.  */
 673   unsigned *n_cand_uses;
 674
 675   /* The candidates used.  */
 676   bitmap cands;
 677
 678   /* The number of candidates in the set.  */
 679   unsigned n_cands;
 680
 681   /* The number of invariants needed, including both invariant variants and
 682      invariant expressions.  */
 683   unsigned n_invs;
 684
 685   /* Total cost of expressing uses.  */
 686   comp_cost cand_use_cost;
 687
 688   /* Total cost of candidates.  */
 689   int64_t cand_cost;
 690
 691   /* Number of times each invariant variable is used.  */
 692   unsigned *n_inv_var_uses;
 693
 694   /* Number of times each invariant expression is used.  */
 695   unsigned *n_inv_expr_uses;
 696
 697   /* Total cost of the assignment.  */
 698   comp_cost cost;
 699 };
 700
 701 /* Difference of two iv candidate assignments.  */
 702
 703 struct iv_ca_delta
 704 {
 705   /* Changed group.  */
 706   struct iv_group *group;
 707
 708   /* An old assignment (for rollback purposes).  */
 709   class cost_pair *old_cp;
 710
 711   /* A new assignment.  */
 712   class cost_pair *new_cp;
 713
 714   /* Next change in the list.  */
 715   struct iv_ca_delta *next;
 716 };
 717
 718 /* Bound on number of candidates below that all candidates are considered.  */
 719
 720 #define CONSIDER_ALL_CANDIDATES_BOUND \
 721   ((unsigned) param_iv_consider_all_candidates_bound)
 722
 723 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 724    optimizing such a loop would help, and it would take ages).  */
 725
 726 #define MAX_CONSIDERED_GROUPS \
 727   ((unsigned) param_iv_max_considered_uses)
 728
 729 /* If there are at most this number of ivs in the set, try removing unnecessary
 730    ivs from the set always.  */
 731
 732 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 733   ((unsigned) param_iv_always_prune_cand_set_bound)
 734
 735 /* The list of trees for that the decl_rtl field must be reset is stored
 736    here.  */
 737
 738 static vec<tree> decl_rtl_to_reset;
 739
 740 static comp_cost force_expr_to_var_cost (tree, bool);
 741
 742 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 743
 744 edge
 745 single_dom_exit (class loop *loop)
 746 {
 747   edge exit = single_exit (loop);
 748
 749   if (!exit)
 750     return NULL;
 751
 752   if (!just_once_each_iteration_p (loop, exit->src))
 753     return NULL;
 754
 755   return exit;
 756 }
 757
 758 /* Dumps information about the induction variable IV to FILE.  Don't dump
 759    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 760    preceding spaces indicated by INDENT_LEVEL.  */
 761
 762 void
 763 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 764 {
 765   const char *p;
 766   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 767
 768   if (indent_level > 4)
 769     indent_level = 4;
 770   p = spaces + 8 - (indent_level << 1);
 771
 772   fprintf (file, "%sIV struct:\n", p);
 773   if (iv->ssa_name && dump_name)
 774     {
 775       fprintf (file, "%s  SSA_NAME:\t", p);
 776       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 777       fprintf (file, "\n");
 778     }
 779
 780   fprintf (file, "%s  Type:\t", p);
 781   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 782   fprintf (file, "\n");
 783
 784   fprintf (file, "%s  Base:\t", p);
 785   print_generic_expr (file, iv->base, TDF_SLIM);
 786   fprintf (file, "\n");
 787
 788   fprintf (file, "%s  Step:\t", p);
 789   print_generic_expr (file, iv->step, TDF_SLIM);
 790   fprintf (file, "\n");
 791
 792   if (iv->base_object)
 793     {
 794       fprintf (file, "%s  Object:\t", p);
 795       print_generic_expr (file, iv->base_object, TDF_SLIM);
 796       fprintf (file, "\n");
 797     }
 798
 799   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 800
 801   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 802            p, iv->no_overflow ? "No-overflow" : "Overflow");
 803 }
 804
 805 /* Dumps information about the USE to FILE.  */
 806
 807 void
 808 dump_use (FILE *file, struct iv_use *use)
 809 {
 810   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 811   fprintf (file, "    At stmt:\t");
 812   print_gimple_stmt (file, use->stmt, 0);
 813   fprintf (file, "    At pos:\t");
 814   if (use->op_p)
 815     print_generic_expr (file, *use->op_p, TDF_SLIM);
 816   fprintf (file, "\n");
 817   dump_iv (file, use->iv, false, 2);
 818 }
 819
 820 /* Dumps information about the uses to FILE.  */
 821
 822 void
 823 dump_groups (FILE *file, struct ivopts_data *data)
 824 {
 825   unsigned i, j;
 826   struct iv_group *group;
 827
 828   for (i = 0; i < data->vgroups.length (); i++)
 829     {
 830       group = data->vgroups[i];
 831       fprintf (file, "Group %d:\n", group->id);
 832       if (group->type == USE_NONLINEAR_EXPR)
 833         fprintf (file, "  Type:\tGENERIC\n");
 834       else if (group->type == USE_REF_ADDRESS)
 835         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 836       else if (group->type == USE_PTR_ADDRESS)
 837         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 838       else
 839         {
 840           gcc_assert (group->type == USE_COMPARE);
 841           fprintf (file, "  Type:\tCOMPARE\n");
 842         }
 843       for (j = 0; j < group->vuses.length (); j++)
 844         dump_use (file, group->vuses[j]);
 845     }
 846 }
 847
 848 /* Dumps information about induction variable candidate CAND to FILE.  */
 849
 850 void
 851 dump_cand (FILE *file, struct iv_cand *cand)
 852 {
 853   struct iv *iv = cand->iv;
 854
 855   fprintf (file, "Candidate %d:\n", cand->id);
 856   if (cand->inv_vars)
 857     {
 858       fprintf (file, "  Depend on inv.vars: ");
 859       dump_bitmap (file, cand->inv_vars);
 860     }
 861   if (cand->inv_exprs)
 862     {
 863       fprintf (file, "  Depend on inv.exprs: ");
 864       dump_bitmap (file, cand->inv_exprs);
 865     }
 866
 867   if (cand->var_before)
 868     {
 869       fprintf (file, "  Var befor: ");
 870       print_generic_expr (file, cand->var_before, TDF_SLIM);
 871       fprintf (file, "\n");
 872     }
 873   if (cand->var_after)
 874     {
 875       fprintf (file, "  Var after: ");
 876       print_generic_expr (file, cand->var_after, TDF_SLIM);
 877       fprintf (file, "\n");
 878     }
 879
 880   switch (cand->pos)
 881     {
 882     case IP_NORMAL:
 883       fprintf (file, "  Incr POS: before exit test\n");
 884       break;
 885
 886     case IP_BEFORE_USE:
 887       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 888       break;
 889
 890     case IP_AFTER_USE:
 891       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 892       break;
 893
 894     case IP_END:
 895       fprintf (file, "  Incr POS: at end\n");
 896       break;
 897
 898     case IP_ORIGINAL:
 899       fprintf (file, "  Incr POS: orig biv\n");
 900       break;
 901     }
 902
 903   dump_iv (file, iv, false, 1);
 904 }
 905
 906 /* Returns the info for ssa version VER.  */
 907
 908 static inline struct version_info *
 909 ver_info (struct ivopts_data *data, unsigned ver)
 910 {
 911   return data->version_info + ver;
 912 }
 913
 914 /* Returns the info for ssa name NAME.  */
 915
 916 static inline struct version_info *
 917 name_info (struct ivopts_data *data, tree name)
 918 {
 919   return ver_info (data, SSA_NAME_VERSION (name));
 920 }
 921
 922 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 923    emitted in LOOP.  */
 924
 925 static bool
 926 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 927 {
 928   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 929
 930   gcc_assert (bb);
 931
 932   if (sbb == loop->latch)
 933     return true;
 934
 935   if (sbb != bb)
 936     return false;
 937
 938   return stmt == last_stmt (bb);
 939 }
 940
 941 /* Returns true if STMT if after the place where the original induction
 942    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 943    if the positions are identical.  */
 944
 945 static bool
 946 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 947 {
 948   basic_block cand_bb = gimple_bb (cand->incremented_at);
 949   basic_block stmt_bb = gimple_bb (stmt);
 950
 951   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 952     return false;
 953
 954   if (stmt_bb != cand_bb)
 955     return true;
 956
 957   if (true_if_equal
 958       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 959     return true;
 960   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 961 }
 962
 963 /* Returns true if STMT if after the place where the induction variable
 964    CAND is incremented in LOOP.  */
 965
 966 static bool
 967 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 968 {
 969   switch (cand->pos)
 970     {
 971     case IP_END:
 972       return false;
 973
 974     case IP_NORMAL:
 975       return stmt_after_ip_normal_pos (loop, stmt);
 976
 977     case IP_ORIGINAL:
 978     case IP_AFTER_USE:
 979       return stmt_after_inc_pos (cand, stmt, false);
 980
 981     case IP_BEFORE_USE:
 982       return stmt_after_inc_pos (cand, stmt, true);
 983
 984     default:
 985       gcc_unreachable ();
 986     }
 987 }
 988
 989 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 990
 991 static tree
 992 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 993 {
 994   if (TREE_CODE (*tp) == SSA_NAME
 995       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 996     return *tp;
 997
 998   if (!EXPR_P (*tp))
 999     *walk_subtrees = 0;
1000
1001   return NULL_TREE;
1002 }
1003
1004 /* Returns true if EXPR contains a ssa name that occurs in an
1005    abnormal phi node.  */
1006
1007 bool
1008 contains_abnormal_ssa_name_p (tree expr)
1009 {
1010   return walk_tree_without_duplicates
1011            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1012 }
1013
1014 /*  Returns the structure describing number of iterations determined from
1015     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1016
1017 static class tree_niter_desc *
1018 niter_for_exit (struct ivopts_data *data, edge exit)
1019 {
1020   class tree_niter_desc *desc;
1021   tree_niter_desc **slot;
1022
1023   if (!data->niters)
1024     {
1025       data->niters = new hash_map<edge, tree_niter_desc *>;
1026       slot = NULL;
1027     }
1028   else
1029     slot = data->niters->get (exit);
1030
1031   if (!slot)
1032     {
1033       /* Try to determine number of iterations.  We cannot safely work with ssa
1034          names that appear in phi nodes on abnormal edges, so that we do not
1035          create overlapping life ranges for them (PR 27283).  */
1036       desc = XNEW (class tree_niter_desc);
1037       if (!number_of_iterations_exit (data->current_loop,
1038                                       exit, desc, true)
1039           || contains_abnormal_ssa_name_p (desc->niter))
1040         {
1041           XDELETE (desc);
1042           desc = NULL;
1043         }
1044       data->niters->put (exit, desc);
1045     }
1046   else
1047     desc = *slot;
1048
1049   return desc;
1050 }
1051
1052 /* Returns the structure describing number of iterations determined from
1053    single dominating exit of DATA->current_loop, or NULL if something
1054    goes wrong.  */
1055
1056 static class tree_niter_desc *
1057 niter_for_single_dom_exit (struct ivopts_data *data)
1058 {
1059   edge exit = single_dom_exit (data->current_loop);
1060
1061   if (!exit)
1062     return NULL;
1063
1064   return niter_for_exit (data, exit);
1065 }
1066
1067 /* Initializes data structures used by the iv optimization pass, stored
1068    in DATA.  */
1069
1070 static void
1071 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1072 {
1073   data->version_info_size = 2 * num_ssa_names;
1074   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1075   data->relevant = BITMAP_ALLOC (NULL);
1076   data->important_candidates = BITMAP_ALLOC (NULL);
1077   data->max_inv_var_id = 0;
1078   data->max_inv_expr_id = 0;
1079   data->niters = NULL;
1080   data->vgroups.create (20);
1081   data->vcands.create (20);
1082   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1083   data->name_expansion_cache = NULL;
1084   data->base_object_map = NULL;
1085   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1086   data->iv_common_cands.create (20);
1087   decl_rtl_to_reset.create (20);
1088   gcc_obstack_init (&data->iv_obstack);
1089 }
1090
1091 /* walk_tree callback for determine_base_object.  */
1092
1093 static tree
1094 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1095 {
1096   tree_code code = TREE_CODE (*tp);
1097   tree obj = NULL_TREE;
1098   if (code == ADDR_EXPR)
1099     {
1100       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1101       if (!base)
1102         obj = *tp;
1103       else if (TREE_CODE (base) != MEM_REF)
1104         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1105     }
1106   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1107         obj = fold_convert (ptr_type_node, *tp);
1108
1109   if (!obj)
1110     {
1111       if (!EXPR_P (*tp))
1112         *walk_subtrees = 0;
1113
1114       return NULL_TREE;
1115     }
1116   /* Record special node for multiple base objects and stop.  */
1117   if (*static_cast<tree *> (wdata))
1118     {
1119       *static_cast<tree *> (wdata) = integer_zero_node;
1120       return integer_zero_node;
1121     }
1122   /* Record the base object and continue looking.  */
1123   *static_cast<tree *> (wdata) = obj;
1124   return NULL_TREE;
1125 }
1126
1127 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1128    are able to determine that it does not point to any such object; specially
1129    return integer_zero_node if EXPR contains multiple base objects.  */
1130
1131 static tree
1132 determine_base_object (struct ivopts_data *data, tree expr)
1133 {
1134   tree *slot, obj = NULL_TREE;
1135   if (data->base_object_map)
1136     {
1137       if ((slot = data->base_object_map->get(expr)) != NULL)
1138         return *slot;
1139     }
1140   else
1141     data->base_object_map = new hash_map<tree, tree>;
1142
1143   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1144   data->base_object_map->put (expr, obj);
1145   return obj;
1146 }
1147
1148 /* Return true if address expression with non-DECL_P operand appears
1149    in EXPR.  */
1150
1151 static bool
1152 contain_complex_addr_expr (tree expr)
1153 {
1154   bool res = false;
1155
1156   STRIP_NOPS (expr);
1157   switch (TREE_CODE (expr))
1158     {
1159     case POINTER_PLUS_EXPR:
1160     case PLUS_EXPR:
1161     case MINUS_EXPR:
1162       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1163       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1164       break;
1165
1166     case ADDR_EXPR:
1167       return (!DECL_P (TREE_OPERAND (expr, 0)));
1168
1169     default:
1170       return false;
1171     }
1172
1173   return res;
1174 }
1175
1176 /* Allocates an induction variable with given initial value BASE and step STEP
1177    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1178
1179 static struct iv *
1180 alloc_iv (struct ivopts_data *data, tree base, tree step,
1181           bool no_overflow = false)
1182 {
1183   tree expr = base;
1184   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1185                                               sizeof (struct iv));
1186   gcc_assert (step != NULL_TREE);
1187
1188   /* Lower address expression in base except ones with DECL_P as operand.
1189      By doing this:
1190        1) More accurate cost can be computed for address expressions;
1191        2) Duplicate candidates won't be created for bases in different
1192           forms, like &a[0] and &a.  */
1193   STRIP_NOPS (expr);
1194   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1195       || contain_complex_addr_expr (expr))
1196     {
1197       aff_tree comb;
1198       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1199       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1200     }
1201
1202   iv->base = base;
1203   iv->base_object = determine_base_object (data, base);
1204   iv->step = step;
1205   iv->biv_p = false;
1206   iv->nonlin_use = NULL;
1207   iv->ssa_name = NULL_TREE;
1208   if (!no_overflow
1209        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1210                               base, step))
1211     no_overflow = true;
1212   iv->no_overflow = no_overflow;
1213   iv->have_address_use = false;
1214
1215   return iv;
1216 }
1217
1218 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1219    doesn't overflow.  */
1220
1221 static void
1222 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1223         bool no_overflow)
1224 {
1225   struct version_info *info = name_info (data, iv);
1226
1227   gcc_assert (!info->iv);
1228
1229   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1230   info->iv = alloc_iv (data, base, step, no_overflow);
1231   info->iv->ssa_name = iv;
1232 }
1233
1234 /* Finds induction variable declaration for VAR.  */
1235
1236 static struct iv *
1237 get_iv (struct ivopts_data *data, tree var)
1238 {
1239   basic_block bb;
1240   tree type = TREE_TYPE (var);
1241
1242   if (!POINTER_TYPE_P (type)
1243       && !INTEGRAL_TYPE_P (type))
1244     return NULL;
1245
1246   if (!name_info (data, var)->iv)
1247     {
1248       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1249
1250       if (!bb
1251           || !flow_bb_inside_loop_p (data->current_loop, bb))
1252         {
1253           if (POINTER_TYPE_P (type))
1254             type = sizetype;
1255           set_iv (data, var, var, build_int_cst (type, 0), true);
1256         }
1257     }
1258
1259   return name_info (data, var)->iv;
1260 }
1261
1262 /* Return the first non-invariant ssa var found in EXPR.  */
1263
1264 static tree
1265 extract_single_var_from_expr (tree expr)
1266 {
1267   int i, n;
1268   tree tmp;
1269   enum tree_code code;
1270
1271   if (!expr || is_gimple_min_invariant (expr))
1272     return NULL;
1273
1274   code = TREE_CODE (expr);
1275   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1276     {
1277       n = TREE_OPERAND_LENGTH (expr);
1278       for (i = 0; i < n; i++)
1279         {
1280           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1281
1282           if (tmp)
1283             return tmp;
1284         }
1285     }
1286   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1287 }
1288
1289 /* Finds basic ivs.  */
1290
1291 static bool
1292 find_bivs (struct ivopts_data *data)
1293 {
1294   gphi *phi;
1295   affine_iv iv;
1296   tree step, type, base, stop;
1297   bool found = false;
1298   class loop *loop = data->current_loop;
1299   gphi_iterator psi;
1300
1301   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1302     {
1303       phi = psi.phi ();
1304
1305       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1306         continue;
1307
1308       if (virtual_operand_p (PHI_RESULT (phi)))
1309         continue;
1310
1311       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1312         continue;
1313
1314       if (integer_zerop (iv.step))
1315         continue;
1316
1317       step = iv.step;
1318       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1319       /* Stop expanding iv base at the first ssa var referred by iv step.
1320          Ideally we should stop at any ssa var, because that's expensive
1321          and unusual to happen, we just do it on the first one.
1322
1323          See PR64705 for the rationale.  */
1324       stop = extract_single_var_from_expr (step);
1325       base = expand_simple_operations (base, stop);
1326       if (contains_abnormal_ssa_name_p (base)
1327           || contains_abnormal_ssa_name_p (step))
1328         continue;
1329
1330       type = TREE_TYPE (PHI_RESULT (phi));
1331       base = fold_convert (type, base);
1332       if (step)
1333         {
1334           if (POINTER_TYPE_P (type))
1335             step = convert_to_ptrofftype (step);
1336           else
1337             step = fold_convert (type, step);
1338         }
1339
1340       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1341       found = true;
1342     }
1343
1344   return found;
1345 }
1346
1347 /* Marks basic ivs.  */
1348
1349 static void
1350 mark_bivs (struct ivopts_data *data)
1351 {
1352   gphi *phi;
1353   gimple *def;
1354   tree var;
1355   struct iv *iv, *incr_iv;
1356   class loop *loop = data->current_loop;
1357   basic_block incr_bb;
1358   gphi_iterator psi;
1359
1360   data->bivs_not_used_in_addr = 0;
1361   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1362     {
1363       phi = psi.phi ();
1364
1365       iv = get_iv (data, PHI_RESULT (phi));
1366       if (!iv)
1367         continue;
1368
1369       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1370       def = SSA_NAME_DEF_STMT (var);
1371       /* Don't mark iv peeled from other one as biv.  */
1372       if (def
1373           && gimple_code (def) == GIMPLE_PHI
1374           && gimple_bb (def) == loop->header)
1375         continue;
1376
1377       incr_iv = get_iv (data, var);
1378       if (!incr_iv)
1379         continue;
1380
1381       /* If the increment is in the subloop, ignore it.  */
1382       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1383       if (incr_bb->loop_father != data->current_loop
1384           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1385         continue;
1386
1387       iv->biv_p = true;
1388       incr_iv->biv_p = true;
1389       if (iv->no_overflow)
1390         data->bivs_not_used_in_addr++;
1391       if (incr_iv->no_overflow)
1392         data->bivs_not_used_in_addr++;
1393     }
1394 }
1395
1396 /* Checks whether STMT defines a linear induction variable and stores its
1397    parameters to IV.  */
1398
1399 static bool
1400 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1401 {
1402   tree lhs, stop;
1403   class loop *loop = data->current_loop;
1404
1405   iv->base = NULL_TREE;
1406   iv->step = NULL_TREE;
1407
1408   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1409     return false;
1410
1411   lhs = gimple_assign_lhs (stmt);
1412   if (TREE_CODE (lhs) != SSA_NAME)
1413     return false;
1414
1415   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1416     return false;
1417
1418   /* Stop expanding iv base at the first ssa var referred by iv step.
1419      Ideally we should stop at any ssa var, because that's expensive
1420      and unusual to happen, we just do it on the first one.
1421
1422      See PR64705 for the rationale.  */
1423   stop = extract_single_var_from_expr (iv->step);
1424   iv->base = expand_simple_operations (iv->base, stop);
1425   if (contains_abnormal_ssa_name_p (iv->base)
1426       || contains_abnormal_ssa_name_p (iv->step))
1427     return false;
1428
1429   /* If STMT could throw, then do not consider STMT as defining a GIV.
1430      While this will suppress optimizations, we cannot safely delete this
1431      GIV and associated statements, even if it appears it is not used.  */
1432   if (stmt_could_throw_p (cfun, stmt))
1433     return false;
1434
1435   return true;
1436 }
1437
1438 /* Finds general ivs in statement STMT.  */
1439
1440 static void
1441 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1442 {
1443   affine_iv iv;
1444
1445   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1446     return;
1447
1448   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1449 }
1450
1451 /* Finds general ivs in basic block BB.  */
1452
1453 static void
1454 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1455 {
1456   gimple_stmt_iterator bsi;
1457
1458   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1459     find_givs_in_stmt (data, gsi_stmt (bsi));
1460 }
1461
1462 /* Finds general ivs.  */
1463
1464 static void
1465 find_givs (struct ivopts_data *data, basic_block *body)
1466 {
1467   class loop *loop = data->current_loop;
1468   unsigned i;
1469
1470   for (i = 0; i < loop->num_nodes; i++)
1471     find_givs_in_bb (data, body[i]);
1472 }
1473
1474 /* For each ssa name defined in LOOP determines whether it is an induction
1475    variable and if so, its initial value and step.  */
1476
1477 static bool
1478 find_induction_variables (struct ivopts_data *data, basic_block *body)
1479 {
1480   unsigned i;
1481   bitmap_iterator bi;
1482
1483   if (!find_bivs (data))
1484     return false;
1485
1486   find_givs (data, body);
1487   mark_bivs (data);
1488
1489   if (dump_file && (dump_flags & TDF_DETAILS))
1490     {
1491       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1492
1493       if (niter)
1494         {
1495           fprintf (dump_file, "  number of iterations ");
1496           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1497           if (!integer_zerop (niter->may_be_zero))
1498             {
1499               fprintf (dump_file, "; zero if ");
1500               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1501             }
1502           fprintf (dump_file, "\n");
1503         };
1504
1505       fprintf (dump_file, "\n<Induction Vars>:\n");
1506       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1507         {
1508           struct version_info *info = ver_info (data, i);
1509           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1510             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1511         }
1512     }
1513
1514   return true;
1515 }
1516
1517 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1518    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1519    is the const offset stripped from IV base and MEM_TYPE is the type
1520    of the memory being addressed.  For uses of other types, ADDR_BASE
1521    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1522
1523 static struct iv_use *
1524 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1525             gimple *stmt, enum use_type type, tree mem_type,
1526             tree addr_base, poly_uint64 addr_offset)
1527 {
1528   struct iv_use *use = XCNEW (struct iv_use);
1529
1530   use->id = group->vuses.length ();
1531   use->group_id = group->id;
1532   use->type = type;
1533   use->mem_type = mem_type;
1534   use->iv = iv;
1535   use->stmt = stmt;
1536   use->op_p = use_p;
1537   use->addr_base = addr_base;
1538   use->addr_offset = addr_offset;
1539
1540   group->vuses.safe_push (use);
1541   return use;
1542 }
1543
1544 /* Checks whether OP is a loop-level invariant and if so, records it.
1545    NONLINEAR_USE is true if the invariant is used in a way we do not
1546    handle specially.  */
1547
1548 static void
1549 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1550 {
1551   basic_block bb;
1552   struct version_info *info;
1553
1554   if (TREE_CODE (op) != SSA_NAME
1555       || virtual_operand_p (op))
1556     return;
1557
1558   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1559   if (bb
1560       && flow_bb_inside_loop_p (data->current_loop, bb))
1561     return;
1562
1563   info = name_info (data, op);
1564   info->name = op;
1565   info->has_nonlin_use |= nonlinear_use;
1566   if (!info->inv_id)
1567     info->inv_id = ++data->max_inv_var_id;
1568   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1569 }
1570
1571 /* Record a group of TYPE.  */
1572
1573 static struct iv_group *
1574 record_group (struct ivopts_data *data, enum use_type type)
1575 {
1576   struct iv_group *group = XCNEW (struct iv_group);
1577
1578   group->id = data->vgroups.length ();
1579   group->type = type;
1580   group->related_cands = BITMAP_ALLOC (NULL);
1581   group->vuses.create (1);
1582   group->doloop_p = false;
1583
1584   data->vgroups.safe_push (group);
1585   return group;
1586 }
1587
1588 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1589    New group will be created if there is no existing group for the use.
1590    MEM_TYPE is the type of memory being addressed, or NULL if this
1591    isn't an address reference.  */
1592
1593 static struct iv_use *
1594 record_group_use (struct ivopts_data *data, tree *use_p,
1595                   struct iv *iv, gimple *stmt, enum use_type type,
1596                   tree mem_type)
1597 {
1598   tree addr_base = NULL;
1599   struct iv_group *group = NULL;
1600   poly_uint64 addr_offset = 0;
1601
1602   /* Record non address type use in a new group.  */
1603   if (address_p (type))
1604     {
1605       unsigned int i;
1606
1607       addr_base = strip_offset (iv->base, &addr_offset);
1608       for (i = 0; i < data->vgroups.length (); i++)
1609         {
1610           struct iv_use *use;
1611
1612           group = data->vgroups[i];
1613           use = group->vuses[0];
1614           if (!address_p (use->type))
1615             continue;
1616
1617           /* Check if it has the same stripped base and step.  */
1618           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1619               && operand_equal_p (iv->step, use->iv->step, 0)
1620               && operand_equal_p (addr_base, use->addr_base, 0))
1621             break;
1622         }
1623       if (i == data->vgroups.length ())
1624         group = NULL;
1625     }
1626
1627   if (!group)
1628     group = record_group (data, type);
1629
1630   return record_use (group, use_p, iv, stmt, type, mem_type,
1631                      addr_base, addr_offset);
1632 }
1633
1634 /* Checks whether the use OP is interesting and if so, records it.  */
1635
1636 static struct iv_use *
1637 find_interesting_uses_op (struct ivopts_data *data, tree op)
1638 {
1639   struct iv *iv;
1640   gimple *stmt;
1641   struct iv_use *use;
1642
1643   if (TREE_CODE (op) != SSA_NAME)
1644     return NULL;
1645
1646   iv = get_iv (data, op);
1647   if (!iv)
1648     return NULL;
1649
1650   if (iv->nonlin_use)
1651     {
1652       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1653       return iv->nonlin_use;
1654     }
1655
1656   if (integer_zerop (iv->step))
1657     {
1658       record_invariant (data, op, true);
1659       return NULL;
1660     }
1661
1662   stmt = SSA_NAME_DEF_STMT (op);
1663   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1664
1665   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1666   iv->nonlin_use = use;
1667   return use;
1668 }
1669
1670 /* Indicate how compare type iv_use can be handled.  */
1671 enum comp_iv_rewrite
1672 {
1673   COMP_IV_NA,
1674   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1675   COMP_IV_EXPR,
1676   /* We may rewrite compare type iv_uses on both sides of comparison by
1677      expressing value of each iv_use.  */
1678   COMP_IV_EXPR_2,
1679   /* We may rewrite compare type iv_use by expressing value of the iv_use
1680      or by eliminating it with other iv_cand.  */
1681   COMP_IV_ELIM
1682 };
1683
1684 /* Given a condition in statement STMT, checks whether it is a compare
1685    of an induction variable and an invariant.  If this is the case,
1686    CONTROL_VAR is set to location of the iv, BOUND to the location of
1687    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1688    induction variable descriptions, and true is returned.  If this is not
1689    the case, CONTROL_VAR and BOUND are set to the arguments of the
1690    condition and false is returned.  */
1691
1692 static enum comp_iv_rewrite
1693 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1694                        tree **control_var, tree **bound,
1695                        struct iv **iv_var, struct iv **iv_bound)
1696 {
1697   /* The objects returned when COND has constant operands.  */
1698   static struct iv const_iv;
1699   static tree zero;
1700   tree *op0 = &zero, *op1 = &zero;
1701   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1702   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1703
1704   if (gimple_code (stmt) == GIMPLE_COND)
1705     {
1706       gcond *cond_stmt = as_a <gcond *> (stmt);
1707       op0 = gimple_cond_lhs_ptr (cond_stmt);
1708       op1 = gimple_cond_rhs_ptr (cond_stmt);
1709     }
1710   else
1711     {
1712       op0 = gimple_assign_rhs1_ptr (stmt);
1713       op1 = gimple_assign_rhs2_ptr (stmt);
1714     }
1715
1716   zero = integer_zero_node;
1717   const_iv.step = integer_zero_node;
1718
1719   if (TREE_CODE (*op0) == SSA_NAME)
1720     iv0 = get_iv (data, *op0);
1721   if (TREE_CODE (*op1) == SSA_NAME)
1722     iv1 = get_iv (data, *op1);
1723
1724   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1725   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1726     {
1727       rewrite_type = COMP_IV_EXPR_2;
1728       goto end;
1729     }
1730
1731   /* If none side of comparison is IV.  */
1732   if ((!iv0 || integer_zerop (iv0->step))
1733       && (!iv1 || integer_zerop (iv1->step)))
1734     goto end;
1735
1736   /* Control variable may be on the other side.  */
1737   if (!iv0 || integer_zerop (iv0->step))
1738     {
1739       std::swap (op0, op1);
1740       std::swap (iv0, iv1);
1741     }
1742   /* If one side is IV and the other side isn't loop invariant.  */
1743   if (!iv1)
1744     rewrite_type = COMP_IV_EXPR;
1745   /* If one side is IV and the other side is loop invariant.  */
1746   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1747     rewrite_type = COMP_IV_ELIM;
1748
1749 end:
1750   if (control_var)
1751     *control_var = op0;
1752   if (iv_var)
1753     *iv_var = iv0;
1754   if (bound)
1755     *bound = op1;
1756   if (iv_bound)
1757     *iv_bound = iv1;
1758
1759   return rewrite_type;
1760 }
1761
1762 /* Checks whether the condition in STMT is interesting and if so,
1763    records it.  */
1764
1765 static void
1766 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1767 {
1768   tree *var_p, *bound_p;
1769   struct iv *var_iv, *bound_iv;
1770   enum comp_iv_rewrite ret;
1771
1772   ret = extract_cond_operands (data, stmt,
1773                                &var_p, &bound_p, &var_iv, &bound_iv);
1774   if (ret == COMP_IV_NA)
1775     {
1776       find_interesting_uses_op (data, *var_p);
1777       find_interesting_uses_op (data, *bound_p);
1778       return;
1779     }
1780
1781   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1782   /* Record compare type iv_use for iv on the other side of comparison.  */
1783   if (ret == COMP_IV_EXPR_2)
1784     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1785 }
1786
1787 /* Returns the outermost loop EXPR is obviously invariant in
1788    relative to the loop LOOP, i.e. if all its operands are defined
1789    outside of the returned loop.  Returns NULL if EXPR is not
1790    even obviously invariant in LOOP.  */
1791
1792 class loop *
1793 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1794 {
1795   basic_block def_bb;
1796   unsigned i, len;
1797
1798   if (is_gimple_min_invariant (expr))
1799     return current_loops->tree_root;
1800
1801   if (TREE_CODE (expr) == SSA_NAME)
1802     {
1803       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1804       if (def_bb)
1805         {
1806           if (flow_bb_inside_loop_p (loop, def_bb))
1807             return NULL;
1808           return superloop_at_depth (loop,
1809                                      loop_depth (def_bb->loop_father) + 1);
1810         }
1811
1812       return current_loops->tree_root;
1813     }
1814
1815   if (!EXPR_P (expr))
1816     return NULL;
1817
1818   unsigned maxdepth = 0;
1819   len = TREE_OPERAND_LENGTH (expr);
1820   for (i = 0; i < len; i++)
1821     {
1822       class loop *ivloop;
1823       if (!TREE_OPERAND (expr, i))
1824         continue;
1825
1826       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1827       if (!ivloop)
1828         return NULL;
1829       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1830     }
1831
1832   return superloop_at_depth (loop, maxdepth);
1833 }
1834
1835 /* Returns true if expression EXPR is obviously invariant in LOOP,
1836    i.e. if all its operands are defined outside of the LOOP.  LOOP
1837    should not be the function body.  */
1838
1839 bool
1840 expr_invariant_in_loop_p (class loop *loop, tree expr)
1841 {
1842   basic_block def_bb;
1843   unsigned i, len;
1844
1845   gcc_assert (loop_depth (loop) > 0);
1846
1847   if (is_gimple_min_invariant (expr))
1848     return true;
1849
1850   if (TREE_CODE (expr) == SSA_NAME)
1851     {
1852       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1853       if (def_bb
1854           && flow_bb_inside_loop_p (loop, def_bb))
1855         return false;
1856
1857       return true;
1858     }
1859
1860   if (!EXPR_P (expr))
1861     return false;
1862
1863   len = TREE_OPERAND_LENGTH (expr);
1864   for (i = 0; i < len; i++)
1865     if (TREE_OPERAND (expr, i)
1866         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1867       return false;
1868
1869   return true;
1870 }
1871
1872 /* Given expression EXPR which computes inductive values with respect
1873    to loop recorded in DATA, this function returns biv from which EXPR
1874    is derived by tracing definition chains of ssa variables in EXPR.  */
1875
1876 static struct iv*
1877 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1878 {
1879   struct iv *iv;
1880   unsigned i, n;
1881   tree e2, e1;
1882   enum tree_code code;
1883   gimple *stmt;
1884
1885   if (expr == NULL_TREE)
1886     return NULL;
1887
1888   if (is_gimple_min_invariant (expr))
1889     return NULL;
1890
1891   code = TREE_CODE (expr);
1892   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1893     {
1894       n = TREE_OPERAND_LENGTH (expr);
1895       for (i = 0; i < n; i++)
1896         {
1897           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1898           if (iv)
1899             return iv;
1900         }
1901     }
1902
1903   /* Stop if it's not ssa name.  */
1904   if (code != SSA_NAME)
1905     return NULL;
1906
1907   iv = get_iv (data, expr);
1908   if (!iv || integer_zerop (iv->step))
1909     return NULL;
1910   else if (iv->biv_p)
1911     return iv;
1912
1913   stmt = SSA_NAME_DEF_STMT (expr);
1914   if (gphi *phi = dyn_cast <gphi *> (stmt))
1915     {
1916       ssa_op_iter iter;
1917       use_operand_p use_p;
1918       basic_block phi_bb = gimple_bb (phi);
1919
1920       /* Skip loop header PHI that doesn't define biv.  */
1921       if (phi_bb->loop_father == data->current_loop)
1922         return NULL;
1923
1924       if (virtual_operand_p (gimple_phi_result (phi)))
1925         return NULL;
1926
1927       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1928         {
1929           tree use = USE_FROM_PTR (use_p);
1930           iv = find_deriving_biv_for_expr (data, use);
1931           if (iv)
1932             return iv;
1933         }
1934       return NULL;
1935     }
1936   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1937     return NULL;
1938
1939   e1 = gimple_assign_rhs1 (stmt);
1940   code = gimple_assign_rhs_code (stmt);
1941   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1942     return find_deriving_biv_for_expr (data, e1);
1943
1944   switch (code)
1945     {
1946     case MULT_EXPR:
1947     case PLUS_EXPR:
1948     case MINUS_EXPR:
1949     case POINTER_PLUS_EXPR:
1950       /* Increments, decrements and multiplications by a constant
1951          are simple.  */
1952       e2 = gimple_assign_rhs2 (stmt);
1953       iv = find_deriving_biv_for_expr (data, e2);
1954       if (iv)
1955         return iv;
1956       gcc_fallthrough ();
1957
1958     CASE_CONVERT:
1959       /* Casts are simple.  */
1960       return find_deriving_biv_for_expr (data, e1);
1961
1962     default:
1963       break;
1964     }
1965
1966   return NULL;
1967 }
1968
1969 /* Record BIV, its predecessor and successor that they are used in
1970    address type uses.  */
1971
1972 static void
1973 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1974 {
1975   unsigned i;
1976   tree type, base_1, base_2;
1977   bitmap_iterator bi;
1978
1979   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1980       || biv->have_address_use || !biv->no_overflow)
1981     return;
1982
1983   type = TREE_TYPE (biv->base);
1984   if (!INTEGRAL_TYPE_P (type))
1985     return;
1986
1987   biv->have_address_use = true;
1988   data->bivs_not_used_in_addr--;
1989   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1990   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1991     {
1992       struct iv *iv = ver_info (data, i)->iv;
1993
1994       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1995           || iv->have_address_use || !iv->no_overflow)
1996         continue;
1997
1998       if (type != TREE_TYPE (iv->base)
1999           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2000         continue;
2001
2002       if (!operand_equal_p (biv->step, iv->step, 0))
2003         continue;
2004
2005       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2006       if (operand_equal_p (base_1, iv->base, 0)
2007           || operand_equal_p (base_2, biv->base, 0))
2008         {
2009           iv->have_address_use = true;
2010           data->bivs_not_used_in_addr--;
2011         }
2012     }
2013 }
2014
2015 /* Cumulates the steps of indices into DATA and replaces their values with the
2016    initial ones.  Returns false when the value of the index cannot be determined.
2017    Callback for for_each_index.  */
2018
2019 struct ifs_ivopts_data
2020 {
2021   struct ivopts_data *ivopts_data;
2022   gimple *stmt;
2023   tree step;
2024 };
2025
2026 static bool
2027 idx_find_step (tree base, tree *idx, void *data)
2028 {
2029   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2030   struct iv *iv;
2031   bool use_overflow_semantics = false;
2032   tree step, iv_base, iv_step, lbound, off;
2033   class loop *loop = dta->ivopts_data->current_loop;
2034
2035   /* If base is a component ref, require that the offset of the reference
2036      be invariant.  */
2037   if (TREE_CODE (base) == COMPONENT_REF)
2038     {
2039       off = component_ref_field_offset (base);
2040       return expr_invariant_in_loop_p (loop, off);
2041     }
2042
2043   /* If base is array, first check whether we will be able to move the
2044      reference out of the loop (in order to take its address in strength
2045      reduction).  In order for this to work we need both lower bound
2046      and step to be loop invariants.  */
2047   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2048     {
2049       /* Moreover, for a range, the size needs to be invariant as well.  */
2050       if (TREE_CODE (base) == ARRAY_RANGE_REF
2051           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2052         return false;
2053
2054       step = array_ref_element_size (base);
2055       lbound = array_ref_low_bound (base);
2056
2057       if (!expr_invariant_in_loop_p (loop, step)
2058           || !expr_invariant_in_loop_p (loop, lbound))
2059         return false;
2060     }
2061
2062   if (TREE_CODE (*idx) != SSA_NAME)
2063     return true;
2064
2065   iv = get_iv (dta->ivopts_data, *idx);
2066   if (!iv)
2067     return false;
2068
2069   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2070           *&x[0], which is not folded and does not trigger the
2071           ARRAY_REF path below.  */
2072   *idx = iv->base;
2073
2074   if (integer_zerop (iv->step))
2075     return true;
2076
2077   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2078     {
2079       step = array_ref_element_size (base);
2080
2081       /* We only handle addresses whose step is an integer constant.  */
2082       if (TREE_CODE (step) != INTEGER_CST)
2083         return false;
2084     }
2085   else
2086     /* The step for pointer arithmetics already is 1 byte.  */
2087     step = size_one_node;
2088
2089   iv_base = iv->base;
2090   iv_step = iv->step;
2091   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2092     use_overflow_semantics = true;
2093
2094   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2095                             sizetype, &iv_base, &iv_step, dta->stmt,
2096                             use_overflow_semantics))
2097     {
2098       /* The index might wrap.  */
2099       return false;
2100     }
2101
2102   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2103   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2104
2105   if (dta->ivopts_data->bivs_not_used_in_addr)
2106     {
2107       if (!iv->biv_p)
2108         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2109
2110       record_biv_for_address_use (dta->ivopts_data, iv);
2111     }
2112   return true;
2113 }
2114
2115 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2116    object is passed to it in DATA.  */
2117
2118 static bool
2119 idx_record_use (tree base, tree *idx,
2120                 void *vdata)
2121 {
2122   struct ivopts_data *data = (struct ivopts_data *) vdata;
2123   find_interesting_uses_op (data, *idx);
2124   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2125     {
2126       find_interesting_uses_op (data, array_ref_element_size (base));
2127       find_interesting_uses_op (data, array_ref_low_bound (base));
2128     }
2129   return true;
2130 }
2131
2132 /* If we can prove that TOP = cst * BOT for some constant cst,
2133    store cst to MUL and return true.  Otherwise return false.
2134    The returned value is always sign-extended, regardless of the
2135    signedness of TOP and BOT.  */
2136
2137 static bool
2138 constant_multiple_of (tree top, tree bot, widest_int *mul)
2139 {
2140   tree mby;
2141   enum tree_code code;
2142   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2143   widest_int res, p0, p1;
2144
2145   STRIP_NOPS (top);
2146   STRIP_NOPS (bot);
2147
2148   if (operand_equal_p (top, bot, 0))
2149     {
2150       *mul = 1;
2151       return true;
2152     }
2153
2154   code = TREE_CODE (top);
2155   switch (code)
2156     {
2157     case MULT_EXPR:
2158       mby = TREE_OPERAND (top, 1);
2159       if (TREE_CODE (mby) != INTEGER_CST)
2160         return false;
2161
2162       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2163         return false;
2164
2165       *mul = wi::sext (res * wi::to_widest (mby), precision);
2166       return true;
2167
2168     case PLUS_EXPR:
2169     case MINUS_EXPR:
2170       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2171           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2172         return false;
2173
2174       if (code == MINUS_EXPR)
2175         p1 = -p1;
2176       *mul = wi::sext (p0 + p1, precision);
2177       return true;
2178
2179     case INTEGER_CST:
2180       if (TREE_CODE (bot) != INTEGER_CST)
2181         return false;
2182
2183       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2184       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2185       if (p1 == 0)
2186         return false;
2187       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2188       return res == 0;
2189
2190     default:
2191       if (POLY_INT_CST_P (top)
2192           && POLY_INT_CST_P (bot)
2193           && constant_multiple_p (wi::to_poly_widest (top),
2194                                   wi::to_poly_widest (bot), mul))
2195         return true;
2196
2197       return false;
2198     }
2199 }
2200
2201 /* Return true if memory reference REF with step STEP may be unaligned.  */
2202
2203 static bool
2204 may_be_unaligned_p (tree ref, tree step)
2205 {
2206   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2207      thus they are not misaligned.  */
2208   if (TREE_CODE (ref) == TARGET_MEM_REF)
2209     return false;
2210
2211   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2212   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2213     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2214
2215   unsigned HOST_WIDE_INT bitpos;
2216   unsigned int ref_align;
2217   get_object_alignment_1 (ref, &ref_align, &bitpos);
2218   if (ref_align < align
2219       || (bitpos % align) != 0
2220       || (bitpos % BITS_PER_UNIT) != 0)
2221     return true;
2222
2223   unsigned int trailing_zeros = tree_ctz (step);
2224   if (trailing_zeros < HOST_BITS_PER_INT
2225       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2226     return true;
2227
2228   return false;
2229 }
2230
2231 /* Return true if EXPR may be non-addressable.   */
2232
2233 bool
2234 may_be_nonaddressable_p (tree expr)
2235 {
2236   switch (TREE_CODE (expr))
2237     {
2238     case VAR_DECL:
2239       /* Check if it's a register variable.  */
2240       return DECL_HARD_REGISTER (expr);
2241
2242     case TARGET_MEM_REF:
2243       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2244          target, thus they are always addressable.  */
2245       return false;
2246
2247     case MEM_REF:
2248       /* Likewise for MEM_REFs, modulo the storage order.  */
2249       return REF_REVERSE_STORAGE_ORDER (expr);
2250
2251     case BIT_FIELD_REF:
2252       if (REF_REVERSE_STORAGE_ORDER (expr))
2253         return true;
2254       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2255
2256     case COMPONENT_REF:
2257       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2258         return true;
2259       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2260              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2261
2262     case ARRAY_REF:
2263     case ARRAY_RANGE_REF:
2264       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2265         return true;
2266       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2267
2268     case VIEW_CONVERT_EXPR:
2269       /* This kind of view-conversions may wrap non-addressable objects
2270          and make them look addressable.  After some processing the
2271          non-addressability may be uncovered again, causing ADDR_EXPRs
2272          of inappropriate objects to be built.  */
2273       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2274           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2275         return true;
2276       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2277
2278     CASE_CONVERT:
2279       return true;
2280
2281     default:
2282       break;
2283     }
2284
2285   return false;
2286 }
2287
2288 /* Finds addresses in *OP_P inside STMT.  */
2289
2290 static void
2291 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2292                                tree *op_p)
2293 {
2294   tree base = *op_p, step = size_zero_node;
2295   struct iv *civ;
2296   struct ifs_ivopts_data ifs_ivopts_data;
2297
2298   /* Do not play with volatile memory references.  A bit too conservative,
2299      perhaps, but safe.  */
2300   if (gimple_has_volatile_ops (stmt))
2301     goto fail;
2302
2303   /* Ignore bitfields for now.  Not really something terribly complicated
2304      to handle.  TODO.  */
2305   if (TREE_CODE (base) == BIT_FIELD_REF)
2306     goto fail;
2307
2308   base = unshare_expr (base);
2309
2310   if (TREE_CODE (base) == TARGET_MEM_REF)
2311     {
2312       tree type = build_pointer_type (TREE_TYPE (base));
2313       tree astep;
2314
2315       if (TMR_BASE (base)
2316           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2317         {
2318           civ = get_iv (data, TMR_BASE (base));
2319           if (!civ)
2320             goto fail;
2321
2322           TMR_BASE (base) = civ->base;
2323           step = civ->step;
2324         }
2325       if (TMR_INDEX2 (base)
2326           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2327         {
2328           civ = get_iv (data, TMR_INDEX2 (base));
2329           if (!civ)
2330             goto fail;
2331
2332           TMR_INDEX2 (base) = civ->base;
2333           step = civ->step;
2334         }
2335       if (TMR_INDEX (base)
2336           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2337         {
2338           civ = get_iv (data, TMR_INDEX (base));
2339           if (!civ)
2340             goto fail;
2341
2342           TMR_INDEX (base) = civ->base;
2343           astep = civ->step;
2344
2345           if (astep)
2346             {
2347               if (TMR_STEP (base))
2348                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2349
2350               step = fold_build2 (PLUS_EXPR, type, step, astep);
2351             }
2352         }
2353
2354       if (integer_zerop (step))
2355         goto fail;
2356       base = tree_mem_ref_addr (type, base);
2357     }
2358   else
2359     {
2360       ifs_ivopts_data.ivopts_data = data;
2361       ifs_ivopts_data.stmt = stmt;
2362       ifs_ivopts_data.step = size_zero_node;
2363       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2364           || integer_zerop (ifs_ivopts_data.step))
2365         goto fail;
2366       step = ifs_ivopts_data.step;
2367
2368       /* Check that the base expression is addressable.  This needs
2369          to be done after substituting bases of IVs into it.  */
2370       if (may_be_nonaddressable_p (base))
2371         goto fail;
2372
2373       /* Moreover, on strict alignment platforms, check that it is
2374          sufficiently aligned.  */
2375       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2376         goto fail;
2377
2378       base = build_fold_addr_expr (base);
2379
2380       /* Substituting bases of IVs into the base expression might
2381          have caused folding opportunities.  */
2382       if (TREE_CODE (base) == ADDR_EXPR)
2383         {
2384           tree *ref = &TREE_OPERAND (base, 0);
2385           while (handled_component_p (*ref))
2386             ref = &TREE_OPERAND (*ref, 0);
2387           if (TREE_CODE (*ref) == MEM_REF)
2388             {
2389               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2390                                       TREE_OPERAND (*ref, 0),
2391                                       TREE_OPERAND (*ref, 1));
2392               if (tem)
2393                 *ref = tem;
2394             }
2395         }
2396     }
2397
2398   civ = alloc_iv (data, base, step);
2399   /* Fail if base object of this memory reference is unknown.  */
2400   if (civ->base_object == NULL_TREE)
2401     goto fail;
2402
2403   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2404   return;
2405
2406 fail:
2407   for_each_index (op_p, idx_record_use, data);
2408 }
2409
2410 /* Finds and records invariants used in STMT.  */
2411
2412 static void
2413 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2414 {
2415   ssa_op_iter iter;
2416   use_operand_p use_p;
2417   tree op;
2418
2419   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2420     {
2421       op = USE_FROM_PTR (use_p);
2422       record_invariant (data, op, false);
2423     }
2424 }
2425
2426 /* CALL calls an internal function.  If operand *OP_P will become an
2427    address when the call is expanded, return the type of the memory
2428    being addressed, otherwise return null.  */
2429
2430 static tree
2431 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2432 {
2433   switch (gimple_call_internal_fn (call))
2434     {
2435     case IFN_MASK_LOAD:
2436     case IFN_MASK_LOAD_LANES:
2437     case IFN_LEN_LOAD:
2438       if (op_p == gimple_call_arg_ptr (call, 0))
2439         return TREE_TYPE (gimple_call_lhs (call));
2440       return NULL_TREE;
2441
2442     case IFN_MASK_STORE:
2443     case IFN_MASK_STORE_LANES:
2444     case IFN_LEN_STORE:
2445       if (op_p == gimple_call_arg_ptr (call, 0))
2446         return TREE_TYPE (gimple_call_arg (call, 3));
2447       return NULL_TREE;
2448
2449     default:
2450       return NULL_TREE;
2451     }
2452 }
2453
2454 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2455    Return true if the operand will become an address when STMT
2456    is expanded and record the associated address use if so.  */
2457
2458 static bool
2459 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2460                        struct iv *iv)
2461 {
2462   /* Fail if base object of this memory reference is unknown.  */
2463   if (iv->base_object == NULL_TREE)
2464     return false;
2465
2466   tree mem_type = NULL_TREE;
2467   if (gcall *call = dyn_cast <gcall *> (stmt))
2468     if (gimple_call_internal_p (call))
2469       mem_type = get_mem_type_for_internal_fn (call, op_p);
2470   if (mem_type)
2471     {
2472       iv = alloc_iv (data, iv->base, iv->step);
2473       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2474       return true;
2475     }
2476   return false;
2477 }
2478
2479 /* Finds interesting uses of induction variables in the statement STMT.  */
2480
2481 static void
2482 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2483 {
2484   struct iv *iv;
2485   tree op, *lhs, *rhs;
2486   ssa_op_iter iter;
2487   use_operand_p use_p;
2488   enum tree_code code;
2489
2490   find_invariants_stmt (data, stmt);
2491
2492   if (gimple_code (stmt) == GIMPLE_COND)
2493     {
2494       find_interesting_uses_cond (data, stmt);
2495       return;
2496     }
2497
2498   if (is_gimple_assign (stmt))
2499     {
2500       lhs = gimple_assign_lhs_ptr (stmt);
2501       rhs = gimple_assign_rhs1_ptr (stmt);
2502
2503       if (TREE_CODE (*lhs) == SSA_NAME)
2504         {
2505           /* If the statement defines an induction variable, the uses are not
2506              interesting by themselves.  */
2507
2508           iv = get_iv (data, *lhs);
2509
2510           if (iv && !integer_zerop (iv->step))
2511             return;
2512         }
2513
2514       code = gimple_assign_rhs_code (stmt);
2515       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2516           && (REFERENCE_CLASS_P (*rhs)
2517               || is_gimple_val (*rhs)))
2518         {
2519           if (REFERENCE_CLASS_P (*rhs))
2520             find_interesting_uses_address (data, stmt, rhs);
2521           else
2522             find_interesting_uses_op (data, *rhs);
2523
2524           if (REFERENCE_CLASS_P (*lhs))
2525             find_interesting_uses_address (data, stmt, lhs);
2526           return;
2527         }
2528       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2529         {
2530           find_interesting_uses_cond (data, stmt);
2531           return;
2532         }
2533
2534       /* TODO -- we should also handle address uses of type
2535
2536          memory = call (whatever);
2537
2538          and
2539
2540          call (memory).  */
2541     }
2542
2543   if (gimple_code (stmt) == GIMPLE_PHI
2544       && gimple_bb (stmt) == data->current_loop->header)
2545     {
2546       iv = get_iv (data, PHI_RESULT (stmt));
2547
2548       if (iv && !integer_zerop (iv->step))
2549         return;
2550     }
2551
2552   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2553     {
2554       op = USE_FROM_PTR (use_p);
2555
2556       if (TREE_CODE (op) != SSA_NAME)
2557         continue;
2558
2559       iv = get_iv (data, op);
2560       if (!iv)
2561         continue;
2562
2563       if (!find_address_like_use (data, stmt, use_p->use, iv))
2564         find_interesting_uses_op (data, op);
2565     }
2566 }
2567
2568 /* Finds interesting uses of induction variables outside of loops
2569    on loop exit edge EXIT.  */
2570
2571 static void
2572 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2573 {
2574   gphi *phi;
2575   gphi_iterator psi;
2576   tree def;
2577
2578   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2579     {
2580       phi = psi.phi ();
2581       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2582       if (!virtual_operand_p (def))
2583         find_interesting_uses_op (data, def);
2584     }
2585 }
2586
2587 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2588    mode for memory reference represented by USE.  */
2589
2590 static GTY (()) vec<rtx, va_gc> *addr_list;
2591
2592 static bool
2593 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2594 {
2595   rtx reg, addr;
2596   unsigned list_index;
2597   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2598   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2599
2600   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2601   if (list_index >= vec_safe_length (addr_list))
2602     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2603
2604   addr = (*addr_list)[list_index];
2605   if (!addr)
2606     {
2607       addr_mode = targetm.addr_space.address_mode (as);
2608       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2609       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2610       (*addr_list)[list_index] = addr;
2611     }
2612   else
2613     addr_mode = GET_MODE (addr);
2614
2615   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2616   return (memory_address_addr_space_p (mem_mode, addr, as));
2617 }
2618
2619 /* Comparison function to sort group in ascending order of addr_offset.  */
2620
2621 static int
2622 group_compare_offset (const void *a, const void *b)
2623 {
2624   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2625   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2626
2627   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2628 }
2629
2630 /* Check if small groups should be split.  Return true if no group
2631    contains more than two uses with distinct addr_offsets.  Return
2632    false otherwise.  We want to split such groups because:
2633
2634      1) Small groups don't have much benefit and may interfer with
2635         general candidate selection.
2636      2) Size for problem with only small groups is usually small and
2637         general algorithm can handle it well.
2638
2639    TODO -- Above claim may not hold when we want to merge memory
2640    accesses with conseuctive addresses.  */
2641
2642 static bool
2643 split_small_address_groups_p (struct ivopts_data *data)
2644 {
2645   unsigned int i, j, distinct = 1;
2646   struct iv_use *pre;
2647   struct iv_group *group;
2648
2649   for (i = 0; i < data->vgroups.length (); i++)
2650     {
2651       group = data->vgroups[i];
2652       if (group->vuses.length () == 1)
2653         continue;
2654
2655       gcc_assert (address_p (group->type));
2656       if (group->vuses.length () == 2)
2657         {
2658           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2659                                       group->vuses[1]->addr_offset) > 0)
2660             std::swap (group->vuses[0], group->vuses[1]);
2661         }
2662       else
2663         group->vuses.qsort (group_compare_offset);
2664
2665       if (distinct > 2)
2666         continue;
2667
2668       distinct = 1;
2669       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2670         {
2671           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2672             {
2673               pre = group->vuses[j];
2674               distinct++;
2675             }
2676
2677           if (distinct > 2)
2678             break;
2679         }
2680     }
2681
2682   return (distinct <= 2);
2683 }
2684
2685 /* For each group of address type uses, this function further groups
2686    these uses according to the maximum offset supported by target's
2687    [base + offset] addressing mode.  */
2688
2689 static void
2690 split_address_groups (struct ivopts_data *data)
2691 {
2692   unsigned int i, j;
2693   /* Always split group.  */
2694   bool split_p = split_small_address_groups_p (data);
2695
2696   for (i = 0; i < data->vgroups.length (); i++)
2697     {
2698       struct iv_group *new_group = NULL;
2699       struct iv_group *group = data->vgroups[i];
2700       struct iv_use *use = group->vuses[0];
2701
2702       use->id = 0;
2703       use->group_id = group->id;
2704       if (group->vuses.length () == 1)
2705         continue;
2706
2707       gcc_assert (address_p (use->type));
2708
2709       for (j = 1; j < group->vuses.length ();)
2710         {
2711           struct iv_use *next = group->vuses[j];
2712           poly_int64 offset = next->addr_offset - use->addr_offset;
2713
2714           /* Split group if aksed to, or the offset against the first
2715              use can't fit in offset part of addressing mode.  IV uses
2716              having the same offset are still kept in one group.  */
2717           if (maybe_ne (offset, 0)
2718               && (split_p || !addr_offset_valid_p (use, offset)))
2719             {
2720               if (!new_group)
2721                 new_group = record_group (data, group->type);
2722               group->vuses.ordered_remove (j);
2723               new_group->vuses.safe_push (next);
2724               continue;
2725             }
2726
2727           next->id = j;
2728           next->group_id = group->id;
2729           j++;
2730         }
2731     }
2732 }
2733
2734 /* Finds uses of the induction variables that are interesting.  */
2735
2736 static void
2737 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2738 {
2739   basic_block bb;
2740   gimple_stmt_iterator bsi;
2741   unsigned i;
2742   edge e;
2743
2744   for (i = 0; i < data->current_loop->num_nodes; i++)
2745     {
2746       edge_iterator ei;
2747       bb = body[i];
2748
2749       FOR_EACH_EDGE (e, ei, bb->succs)
2750         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2751             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2752           find_interesting_uses_outside (data, e);
2753
2754       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2755         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2756       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2757         if (!is_gimple_debug (gsi_stmt (bsi)))
2758           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2759     }
2760
2761   split_address_groups (data);
2762
2763   if (dump_file && (dump_flags & TDF_DETAILS))
2764     {
2765       fprintf (dump_file, "\n<IV Groups>:\n");
2766       dump_groups (dump_file, data);
2767       fprintf (dump_file, "\n");
2768     }
2769 }
2770
2771 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2772    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2773    we are at the top-level of the processed address.  */
2774
2775 static tree
2776 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2777                 poly_int64 *offset)
2778 {
2779   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2780   enum tree_code code;
2781   tree type, orig_type = TREE_TYPE (expr);
2782   poly_int64 off0, off1;
2783   HOST_WIDE_INT st;
2784   tree orig_expr = expr;
2785
2786   STRIP_NOPS (expr);
2787
2788   type = TREE_TYPE (expr);
2789   code = TREE_CODE (expr);
2790   *offset = 0;
2791
2792   switch (code)
2793     {
2794     case POINTER_PLUS_EXPR:
2795     case PLUS_EXPR:
2796     case MINUS_EXPR:
2797       op0 = TREE_OPERAND (expr, 0);
2798       op1 = TREE_OPERAND (expr, 1);
2799
2800       op0 = strip_offset_1 (op0, false, false, &off0);
2801       op1 = strip_offset_1 (op1, false, false, &off1);
2802
2803       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2804       if (op0 == TREE_OPERAND (expr, 0)
2805           && op1 == TREE_OPERAND (expr, 1))
2806         return orig_expr;
2807
2808       if (integer_zerop (op1))
2809         expr = op0;
2810       else if (integer_zerop (op0))
2811         {
2812           if (code == MINUS_EXPR)
2813             expr = fold_build1 (NEGATE_EXPR, type, op1);
2814           else
2815             expr = op1;
2816         }
2817       else
2818         expr = fold_build2 (code, type, op0, op1);
2819
2820       return fold_convert (orig_type, expr);
2821
2822     case MULT_EXPR:
2823       op1 = TREE_OPERAND (expr, 1);
2824       if (!cst_and_fits_in_hwi (op1))
2825         return orig_expr;
2826
2827       op0 = TREE_OPERAND (expr, 0);
2828       op0 = strip_offset_1 (op0, false, false, &off0);
2829       if (op0 == TREE_OPERAND (expr, 0))
2830         return orig_expr;
2831
2832       *offset = off0 * int_cst_value (op1);
2833       if (integer_zerop (op0))
2834         expr = op0;
2835       else
2836         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2837
2838       return fold_convert (orig_type, expr);
2839
2840     case ARRAY_REF:
2841     case ARRAY_RANGE_REF:
2842       if (!inside_addr)
2843         return orig_expr;
2844
2845       step = array_ref_element_size (expr);
2846       if (!cst_and_fits_in_hwi (step))
2847         break;
2848
2849       st = int_cst_value (step);
2850       op1 = TREE_OPERAND (expr, 1);
2851       op1 = strip_offset_1 (op1, false, false, &off1);
2852       *offset = off1 * st;
2853
2854       if (top_compref
2855           && integer_zerop (op1))
2856         {
2857           /* Strip the component reference completely.  */
2858           op0 = TREE_OPERAND (expr, 0);
2859           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2860           *offset += off0;
2861           return op0;
2862         }
2863       break;
2864
2865     case COMPONENT_REF:
2866       {
2867         tree field;
2868
2869         if (!inside_addr)
2870           return orig_expr;
2871
2872         tmp = component_ref_field_offset (expr);
2873         field = TREE_OPERAND (expr, 1);
2874         if (top_compref
2875             && cst_and_fits_in_hwi (tmp)
2876             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2877           {
2878             HOST_WIDE_INT boffset, abs_off;
2879
2880             /* Strip the component reference completely.  */
2881             op0 = TREE_OPERAND (expr, 0);
2882             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2883             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2884             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2885             if (boffset < 0)
2886               abs_off = -abs_off;
2887
2888             *offset = off0 + int_cst_value (tmp) + abs_off;
2889             return op0;
2890           }
2891       }
2892       break;
2893
2894     case ADDR_EXPR:
2895       op0 = TREE_OPERAND (expr, 0);
2896       op0 = strip_offset_1 (op0, true, true, &off0);
2897       *offset += off0;
2898
2899       if (op0 == TREE_OPERAND (expr, 0))
2900         return orig_expr;
2901
2902       expr = build_fold_addr_expr (op0);
2903       return fold_convert (orig_type, expr);
2904
2905     case MEM_REF:
2906       /* ???  Offset operand?  */
2907       inside_addr = false;
2908       break;
2909
2910     default:
2911       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2912         return build_int_cst (orig_type, 0);
2913       return orig_expr;
2914     }
2915
2916   /* Default handling of expressions for that we want to recurse into
2917      the first operand.  */
2918   op0 = TREE_OPERAND (expr, 0);
2919   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2920   *offset += off0;
2921
2922   if (op0 == TREE_OPERAND (expr, 0)
2923       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2924     return orig_expr;
2925
2926   expr = copy_node (expr);
2927   TREE_OPERAND (expr, 0) = op0;
2928   if (op1)
2929     TREE_OPERAND (expr, 1) = op1;
2930
2931   /* Inside address, we might strip the top level component references,
2932      thus changing type of the expression.  Handling of ADDR_EXPR
2933      will fix that.  */
2934   expr = fold_convert (orig_type, expr);
2935
2936   return expr;
2937 }
2938
2939 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2940
2941 tree
2942 strip_offset (tree expr, poly_uint64_pod *offset)
2943 {
2944   poly_int64 off;
2945   tree core = strip_offset_1 (expr, false, false, &off);
2946   *offset = off;
2947   return core;
2948 }
2949
2950 /* Returns variant of TYPE that can be used as base for different uses.
2951    We return unsigned type with the same precision, which avoids problems
2952    with overflows.  */
2953
2954 static tree
2955 generic_type_for (tree type)
2956 {
2957   if (POINTER_TYPE_P (type))
2958     return unsigned_type_for (type);
2959
2960   if (TYPE_UNSIGNED (type))
2961     return type;
2962
2963   return unsigned_type_for (type);
2964 }
2965
2966 /* Private data for walk_tree.  */
2967
2968 struct walk_tree_data
2969 {
2970   bitmap *inv_vars;
2971   struct ivopts_data *idata;
2972 };
2973
2974 /* Callback function for walk_tree, it records invariants and symbol
2975    reference in *EXPR_P.  DATA is the structure storing result info.  */
2976
2977 static tree
2978 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2979 {
2980   tree op = *expr_p;
2981   struct version_info *info;
2982   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2983
2984   if (TREE_CODE (op) != SSA_NAME)
2985     return NULL_TREE;
2986
2987   info = name_info (wdata->idata, op);
2988   /* Because we expand simple operations when finding IVs, loop invariant
2989      variable that isn't referred by the original loop could be used now.
2990      Record such invariant variables here.  */
2991   if (!info->iv)
2992     {
2993       struct ivopts_data *idata = wdata->idata;
2994       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2995
2996       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2997         {
2998           tree steptype = TREE_TYPE (op);
2999           if (POINTER_TYPE_P (steptype))
3000             steptype = sizetype;
3001           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3002           record_invariant (idata, op, false);
3003         }
3004     }
3005   if (!info->inv_id || info->has_nonlin_use)
3006     return NULL_TREE;
3007
3008   if (!*wdata->inv_vars)
3009     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3010   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3011
3012   return NULL_TREE;
3013 }
3014
3015 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3016    store it.  */
3017
3018 static inline void
3019 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3020 {
3021   struct walk_tree_data wdata;
3022
3023   if (!inv_vars)
3024     return;
3025
3026   wdata.idata = data;
3027   wdata.inv_vars = inv_vars;
3028   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3029 }
3030
3031 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3032    will be recorded if it doesn't exist yet.  Given below two exprs:
3033      inv_expr + cst1, inv_expr + cst2
3034    It's hard to make decision whether constant part should be stripped
3035    or not.  We choose to not strip based on below facts:
3036      1) We need to count ADD cost for constant part if it's stripped,
3037         which isn't always trivial where this functions is called.
3038      2) Stripping constant away may be conflict with following loop
3039         invariant hoisting pass.
3040      3) Not stripping constant away results in more invariant exprs,
3041         which usually leads to decision preferring lower reg pressure.  */
3042
3043 static iv_inv_expr_ent *
3044 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3045 {
3046   STRIP_NOPS (inv_expr);
3047
3048   if (poly_int_tree_p (inv_expr)
3049       || TREE_CODE (inv_expr) == SSA_NAME)
3050     return NULL;
3051
3052   /* Don't strip constant part away as we used to.  */
3053
3054   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3055   struct iv_inv_expr_ent ent;
3056   ent.expr = inv_expr;
3057   ent.hash = iterative_hash_expr (inv_expr, 0);
3058   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3059
3060   if (!*slot)
3061     {
3062       *slot = XNEW (struct iv_inv_expr_ent);
3063       (*slot)->expr = inv_expr;
3064       (*slot)->hash = ent.hash;
3065       (*slot)->id = ++data->max_inv_expr_id;
3066     }
3067
3068   return *slot;
3069 }
3070
3071 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3072    position to POS.  If USE is not NULL, the candidate is set as related to
3073    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3074    replacement of the final value of the iv by a direct computation.  */
3075
3076 static struct iv_cand *
3077 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3078                  enum iv_position pos, struct iv_use *use,
3079                  gimple *incremented_at, struct iv *orig_iv = NULL,
3080                  bool doloop = false)
3081 {
3082   unsigned i;
3083   struct iv_cand *cand = NULL;
3084   tree type, orig_type;
3085
3086   gcc_assert (base && step);
3087
3088   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3089      live, but the ivopts code may replace a real pointer with one
3090      pointing before or after the memory block that is then adjusted
3091      into the memory block during the loop.  FIXME: It would likely be
3092      better to actually force the pointer live and still use ivopts;
3093      for example, it would be enough to write the pointer into memory
3094      and keep it there until after the loop.  */
3095   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3096     return NULL;
3097
3098   /* For non-original variables, make sure their values are computed in a type
3099      that does not invoke undefined behavior on overflows (since in general,
3100      we cannot prove that these induction variables are non-wrapping).  */
3101   if (pos != IP_ORIGINAL)
3102     {
3103       orig_type = TREE_TYPE (base);
3104       type = generic_type_for (orig_type);
3105       if (type != orig_type)
3106         {
3107           base = fold_convert (type, base);
3108           step = fold_convert (type, step);
3109         }
3110     }
3111
3112   for (i = 0; i < data->vcands.length (); i++)
3113     {
3114       cand = data->vcands[i];
3115
3116       if (cand->pos != pos)
3117         continue;
3118
3119       if (cand->incremented_at != incremented_at
3120           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3121               && cand->ainc_use != use))
3122         continue;
3123
3124       if (operand_equal_p (base, cand->iv->base, 0)
3125           && operand_equal_p (step, cand->iv->step, 0)
3126           && (TYPE_PRECISION (TREE_TYPE (base))
3127               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3128         break;
3129     }
3130
3131   if (i == data->vcands.length ())
3132     {
3133       cand = XCNEW (struct iv_cand);
3134       cand->id = i;
3135       cand->iv = alloc_iv (data, base, step);
3136       cand->pos = pos;
3137       if (pos != IP_ORIGINAL)
3138         {
3139           if (doloop)
3140             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3141           else
3142             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3143           cand->var_after = cand->var_before;
3144         }
3145       cand->important = important;
3146       cand->incremented_at = incremented_at;
3147       cand->doloop_p = doloop;
3148       data->vcands.safe_push (cand);
3149
3150       if (!poly_int_tree_p (step))
3151         {
3152           find_inv_vars (data, &step, &cand->inv_vars);
3153
3154           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3155           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3156           if (inv_expr != NULL)
3157             {
3158               cand->inv_exprs = cand->inv_vars;
3159               cand->inv_vars = NULL;
3160               if (cand->inv_exprs)
3161                 bitmap_clear (cand->inv_exprs);
3162               else
3163                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3164
3165               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3166             }
3167         }
3168
3169       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3170         cand->ainc_use = use;
3171       else
3172         cand->ainc_use = NULL;
3173
3174       cand->orig_iv = orig_iv;
3175       if (dump_file && (dump_flags & TDF_DETAILS))
3176         dump_cand (dump_file, cand);
3177     }
3178
3179   cand->important |= important;
3180   cand->doloop_p |= doloop;
3181
3182   /* Relate candidate to the group for which it is added.  */
3183   if (use)
3184     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3185
3186   return cand;
3187 }
3188
3189 /* Returns true if incrementing the induction variable at the end of the LOOP
3190    is allowed.
3191
3192    The purpose is to avoid splitting latch edge with a biv increment, thus
3193    creating a jump, possibly confusing other optimization passes and leaving
3194    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3195    available (so we do not have a better alternative), or if the latch edge
3196    is already nonempty.  */
3197
3198 static bool
3199 allow_ip_end_pos_p (class loop *loop)
3200 {
3201   if (!ip_normal_pos (loop))
3202     return true;
3203
3204   if (!empty_block_p (ip_end_pos (loop)))
3205     return true;
3206
3207   return false;
3208 }
3209
3210 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3211    Important field is set to IMPORTANT.  */
3212
3213 static void
3214 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3215                         bool important, struct iv_use *use)
3216 {
3217   basic_block use_bb = gimple_bb (use->stmt);
3218   machine_mode mem_mode;
3219   unsigned HOST_WIDE_INT cstepi;
3220
3221   /* If we insert the increment in any position other than the standard
3222      ones, we must ensure that it is incremented once per iteration.
3223      It must not be in an inner nested loop, or one side of an if
3224      statement.  */
3225   if (use_bb->loop_father != data->current_loop
3226       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3227       || stmt_can_throw_internal (cfun, use->stmt)
3228       || !cst_and_fits_in_hwi (step))
3229     return;
3230
3231   cstepi = int_cst_value (step);
3232
3233   mem_mode = TYPE_MODE (use->mem_type);
3234   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3235         || USE_STORE_PRE_INCREMENT (mem_mode))
3236        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3237       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3238            || USE_STORE_PRE_DECREMENT (mem_mode))
3239           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3240     {
3241       enum tree_code code = MINUS_EXPR;
3242       tree new_base;
3243       tree new_step = step;
3244
3245       if (POINTER_TYPE_P (TREE_TYPE (base)))
3246         {
3247           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3248           code = POINTER_PLUS_EXPR;
3249         }
3250       else
3251         new_step = fold_convert (TREE_TYPE (base), new_step);
3252       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3253       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3254                        use->stmt);
3255     }
3256   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3257         || USE_STORE_POST_INCREMENT (mem_mode))
3258        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3259       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3260            || USE_STORE_POST_DECREMENT (mem_mode))
3261           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3262     {
3263       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3264                        use->stmt);
3265     }
3266 }
3267
3268 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3269    position to POS.  If USE is not NULL, the candidate is set as related to
3270    it.  The candidate computation is scheduled before exit condition and at
3271    the end of loop.  */
3272
3273 static void
3274 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3275                struct iv_use *use, struct iv *orig_iv = NULL,
3276                bool doloop = false)
3277 {
3278   if (ip_normal_pos (data->current_loop))
3279     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3280                      doloop);
3281   /* Exclude doloop candidate here since it requires decrement then comparison
3282      and jump, the IP_END position doesn't match.  */
3283   if (!doloop && ip_end_pos (data->current_loop)
3284       && allow_ip_end_pos_p (data->current_loop))
3285     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3286 }
3287
3288 /* Adds standard iv candidates.  */
3289
3290 static void
3291 add_standard_iv_candidates (struct ivopts_data *data)
3292 {
3293   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3294
3295   /* The same for a double-integer type if it is still fast enough.  */
3296   if (TYPE_PRECISION
3297         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3298       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3299     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3300                    build_int_cst (long_integer_type_node, 1), true, NULL);
3301
3302   /* The same for a double-integer type if it is still fast enough.  */
3303   if (TYPE_PRECISION
3304         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3305       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3306     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3307                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3308 }
3309
3310
3311 /* Adds candidates bases on the old induction variable IV.  */
3312
3313 static void
3314 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3315 {
3316   gimple *phi;
3317   tree def;
3318   struct iv_cand *cand;
3319
3320   /* Check if this biv is used in address type use.  */
3321   if (iv->no_overflow  && iv->have_address_use
3322       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3323       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3324     {
3325       tree base = fold_convert (sizetype, iv->base);
3326       tree step = fold_convert (sizetype, iv->step);
3327
3328       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3329       add_candidate (data, base, step, true, NULL, iv);
3330       /* Add iv cand of the original type only if it has nonlinear use.  */
3331       if (iv->nonlin_use)
3332         add_candidate (data, iv->base, iv->step, true, NULL);
3333     }
3334   else
3335     add_candidate (data, iv->base, iv->step, true, NULL);
3336
3337   /* The same, but with initial value zero.  */
3338   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3339     add_candidate (data, size_int (0), iv->step, true, NULL);
3340   else
3341     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3342                    iv->step, true, NULL);
3343
3344   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3345   if (gimple_code (phi) == GIMPLE_PHI)
3346     {
3347       /* Additionally record the possibility of leaving the original iv
3348          untouched.  */
3349       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3350       /* Don't add candidate if it's from another PHI node because
3351          it's an affine iv appearing in the form of PEELED_CHREC.  */
3352       phi = SSA_NAME_DEF_STMT (def);
3353       if (gimple_code (phi) != GIMPLE_PHI)
3354         {
3355           cand = add_candidate_1 (data,
3356                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3357                                   SSA_NAME_DEF_STMT (def));
3358           if (cand)
3359             {
3360               cand->var_before = iv->ssa_name;
3361               cand->var_after = def;
3362             }
3363         }
3364       else
3365         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3366     }
3367 }
3368
3369 /* Adds candidates based on the old induction variables.  */
3370
3371 static void
3372 add_iv_candidate_for_bivs (struct ivopts_data *data)
3373 {
3374   unsigned i;
3375   struct iv *iv;
3376   bitmap_iterator bi;
3377
3378   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3379     {
3380       iv = ver_info (data, i)->iv;
3381       if (iv && iv->biv_p && !integer_zerop (iv->step))
3382         add_iv_candidate_for_biv (data, iv);
3383     }
3384 }
3385
3386 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3387
3388 static void
3389 record_common_cand (struct ivopts_data *data, tree base,
3390                     tree step, struct iv_use *use)
3391 {
3392   class iv_common_cand ent;
3393   class iv_common_cand **slot;
3394
3395   ent.base = base;
3396   ent.step = step;
3397   ent.hash = iterative_hash_expr (base, 0);
3398   ent.hash = iterative_hash_expr (step, ent.hash);
3399
3400   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3401   if (*slot == NULL)
3402     {
3403       *slot = new iv_common_cand ();
3404       (*slot)->base = base;
3405       (*slot)->step = step;
3406       (*slot)->uses.create (8);
3407       (*slot)->hash = ent.hash;
3408       data->iv_common_cands.safe_push ((*slot));
3409     }
3410
3411   gcc_assert (use != NULL);
3412   (*slot)->uses.safe_push (use);
3413   return;
3414 }
3415
3416 /* Comparison function used to sort common candidates.  */
3417
3418 static int
3419 common_cand_cmp (const void *p1, const void *p2)
3420 {
3421   unsigned n1, n2;
3422   const class iv_common_cand *const *const ccand1
3423     = (const class iv_common_cand *const *)p1;
3424   const class iv_common_cand *const *const ccand2
3425     = (const class iv_common_cand *const *)p2;
3426
3427   n1 = (*ccand1)->uses.length ();
3428   n2 = (*ccand2)->uses.length ();
3429   return n2 - n1;
3430 }
3431
3432 /* Adds IV candidates based on common candidated recorded.  */
3433
3434 static void
3435 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3436 {
3437   unsigned i, j;
3438   struct iv_cand *cand_1, *cand_2;
3439
3440   data->iv_common_cands.qsort (common_cand_cmp);
3441   for (i = 0; i < data->iv_common_cands.length (); i++)
3442     {
3443       class iv_common_cand *ptr = data->iv_common_cands[i];
3444
3445       /* Only add IV candidate if it's derived from multiple uses.  */
3446       if (ptr->uses.length () <= 1)
3447         break;
3448
3449       cand_1 = NULL;
3450       cand_2 = NULL;
3451       if (ip_normal_pos (data->current_loop))
3452         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3453                                   false, IP_NORMAL, NULL, NULL);
3454
3455       if (ip_end_pos (data->current_loop)
3456           && allow_ip_end_pos_p (data->current_loop))
3457         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3458                                   false, IP_END, NULL, NULL);
3459
3460       /* Bind deriving uses and the new candidates.  */
3461       for (j = 0; j < ptr->uses.length (); j++)
3462         {
3463           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3464           if (cand_1)
3465             bitmap_set_bit (group->related_cands, cand_1->id);
3466           if (cand_2)
3467             bitmap_set_bit (group->related_cands, cand_2->id);
3468         }
3469     }
3470
3471   /* Release data since it is useless from this point.  */
3472   data->iv_common_cand_tab->empty ();
3473   data->iv_common_cands.truncate (0);
3474 }
3475
3476 /* Adds candidates based on the value of USE's iv.  */
3477
3478 static void
3479 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3480 {
3481   poly_uint64 offset;
3482   tree base;
3483   struct iv *iv = use->iv;
3484   tree basetype = TREE_TYPE (iv->base);
3485
3486   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3487      precision types, instead, add candidate for the corresponding scev in
3488      unsigned type with the same precision.  See PR93674 for more info.  */
3489   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3490       || !type_has_mode_precision_p (basetype))
3491     {
3492       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3493                                                  TYPE_UNSIGNED (basetype));
3494       add_candidate (data, fold_convert (basetype, iv->base),
3495                      fold_convert (basetype, iv->step), false, NULL);
3496       return;
3497     }
3498
3499   add_candidate (data, iv->base, iv->step, false, use);
3500
3501   /* Record common candidate for use in case it can be shared by others.  */
3502   record_common_cand (data, iv->base, iv->step, use);
3503
3504   /* Record common candidate with initial value zero.  */
3505   basetype = TREE_TYPE (iv->base);
3506   if (POINTER_TYPE_P (basetype))
3507     basetype = sizetype;
3508   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3509
3510   /* Compare the cost of an address with an unscaled index with the cost of
3511     an address with a scaled index and add candidate if useful.  */
3512   poly_int64 step;
3513   if (use != NULL
3514       && poly_int_tree_p (iv->step, &step)
3515       && address_p (use->type))
3516     {
3517       poly_int64 new_step;
3518       unsigned int fact = preferred_mem_scale_factor
3519         (use->iv->base,
3520          TYPE_MODE (use->mem_type),
3521          optimize_loop_for_speed_p (data->current_loop));
3522
3523       if (fact != 1
3524           && multiple_p (step, fact, &new_step))
3525         add_candidate (data, size_int (0),
3526                        wide_int_to_tree (sizetype, new_step),
3527                        true, NULL);
3528     }
3529
3530   /* Record common candidate with constant offset stripped in base.
3531      Like the use itself, we also add candidate directly for it.  */
3532   base = strip_offset (iv->base, &offset);
3533   if (maybe_ne (offset, 0U) || base != iv->base)
3534     {
3535       record_common_cand (data, base, iv->step, use);
3536       add_candidate (data, base, iv->step, false, use);
3537     }
3538
3539   /* Record common candidate with base_object removed in base.  */
3540   base = iv->base;
3541   STRIP_NOPS (base);
3542   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3543     {
3544       tree step = iv->step;
3545
3546       STRIP_NOPS (step);
3547       base = TREE_OPERAND (base, 1);
3548       step = fold_convert (sizetype, step);
3549       record_common_cand (data, base, step, use);
3550       /* Also record common candidate with offset stripped.  */
3551       base = strip_offset (base, &offset);
3552       if (maybe_ne (offset, 0U))
3553         record_common_cand (data, base, step, use);
3554     }
3555
3556   /* At last, add auto-incremental candidates.  Make such variables
3557      important since other iv uses with same base object may be based
3558      on it.  */
3559   if (use != NULL && address_p (use->type))
3560     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3561 }
3562
3563 /* Adds candidates based on the uses.  */
3564
3565 static void
3566 add_iv_candidate_for_groups (struct ivopts_data *data)
3567 {
3568   unsigned i;
3569
3570   /* Only add candidate for the first use in group.  */
3571   for (i = 0; i < data->vgroups.length (); i++)
3572     {
3573       struct iv_group *group = data->vgroups[i];
3574
3575       gcc_assert (group->vuses[0] != NULL);
3576       add_iv_candidate_for_use (data, group->vuses[0]);
3577     }
3578   add_iv_candidate_derived_from_uses (data);
3579 }
3580
3581 /* Record important candidates and add them to related_cands bitmaps.  */
3582
3583 static void
3584 record_important_candidates (struct ivopts_data *data)
3585 {
3586   unsigned i;
3587   struct iv_group *group;
3588
3589   for (i = 0; i < data->vcands.length (); i++)
3590     {
3591       struct iv_cand *cand = data->vcands[i];
3592
3593       if (cand->important)
3594         bitmap_set_bit (data->important_candidates, i);
3595     }
3596
3597   data->consider_all_candidates = (data->vcands.length ()
3598                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3599
3600   /* Add important candidates to groups' related_cands bitmaps.  */
3601   for (i = 0; i < data->vgroups.length (); i++)
3602     {
3603       group = data->vgroups[i];
3604       bitmap_ior_into (group->related_cands, data->important_candidates);
3605     }
3606 }
3607
3608 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3609    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3610    we allocate a simple list to every use.  */
3611
3612 static void
3613 alloc_use_cost_map (struct ivopts_data *data)
3614 {
3615   unsigned i, size, s;
3616
3617   for (i = 0; i < data->vgroups.length (); i++)
3618     {
3619       struct iv_group *group = data->vgroups[i];
3620
3621       if (data->consider_all_candidates)
3622         size = data->vcands.length ();
3623       else
3624         {
3625           s = bitmap_count_bits (group->related_cands);
3626
3627           /* Round up to the power of two, so that moduling by it is fast.  */
3628           size = s ? (1 << ceil_log2 (s)) : 1;
3629         }
3630
3631       group->n_map_members = size;
3632       group->cost_map = XCNEWVEC (class cost_pair, size);
3633     }
3634 }
3635
3636 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3637    on invariants INV_VARS and that the value used in expressing it is
3638    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3639
3640 static void
3641 set_group_iv_cost (struct ivopts_data *data,
3642                    struct iv_group *group, struct iv_cand *cand,
3643                    comp_cost cost, bitmap inv_vars, tree value,
3644                    enum tree_code comp, bitmap inv_exprs)
3645 {
3646   unsigned i, s;
3647
3648   if (cost.infinite_cost_p ())
3649     {
3650       BITMAP_FREE (inv_vars);
3651       BITMAP_FREE (inv_exprs);
3652       return;
3653     }
3654
3655   if (data->consider_all_candidates)
3656     {
3657       group->cost_map[cand->id].cand = cand;
3658       group->cost_map[cand->id].cost = cost;
3659       group->cost_map[cand->id].inv_vars = inv_vars;
3660       group->cost_map[cand->id].inv_exprs = inv_exprs;
3661       group->cost_map[cand->id].value = value;
3662       group->cost_map[cand->id].comp = comp;
3663       return;
3664     }
3665
3666   /* n_map_members is a power of two, so this computes modulo.  */
3667   s = cand->id & (group->n_map_members - 1);
3668   for (i = s; i < group->n_map_members; i++)
3669     if (!group->cost_map[i].cand)
3670       goto found;
3671   for (i = 0; i < s; i++)
3672     if (!group->cost_map[i].cand)
3673       goto found;
3674
3675   gcc_unreachable ();
3676
3677 found:
3678   group->cost_map[i].cand = cand;
3679   group->cost_map[i].cost = cost;
3680   group->cost_map[i].inv_vars = inv_vars;
3681   group->cost_map[i].inv_exprs = inv_exprs;
3682   group->cost_map[i].value = value;
3683   group->cost_map[i].comp = comp;
3684 }
3685
3686 /* Gets cost of (GROUP, CAND) pair.  */
3687
3688 static class cost_pair *
3689 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3690                    struct iv_cand *cand)
3691 {
3692   unsigned i, s;
3693   class cost_pair *ret;
3694
3695   if (!cand)
3696     return NULL;
3697
3698   if (data->consider_all_candidates)
3699     {
3700       ret = group->cost_map + cand->id;
3701       if (!ret->cand)
3702         return NULL;
3703
3704       return ret;
3705     }
3706
3707   /* n_map_members is a power of two, so this computes modulo.  */
3708   s = cand->id & (group->n_map_members - 1);
3709   for (i = s; i < group->n_map_members; i++)
3710     if (group->cost_map[i].cand == cand)
3711       return group->cost_map + i;
3712     else if (group->cost_map[i].cand == NULL)
3713       return NULL;
3714   for (i = 0; i < s; i++)
3715     if (group->cost_map[i].cand == cand)
3716       return group->cost_map + i;
3717     else if (group->cost_map[i].cand == NULL)
3718       return NULL;
3719
3720   return NULL;
3721 }
3722
3723 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3724 static rtx
3725 produce_memory_decl_rtl (tree obj, int *regno)
3726 {
3727   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3728   machine_mode address_mode = targetm.addr_space.address_mode (as);
3729   rtx x;
3730
3731   gcc_assert (obj);
3732   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3733     {
3734       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3735       x = gen_rtx_SYMBOL_REF (address_mode, name);
3736       SET_SYMBOL_REF_DECL (x, obj);
3737       x = gen_rtx_MEM (DECL_MODE (obj), x);
3738       set_mem_addr_space (x, as);
3739       targetm.encode_section_info (obj, x, true);
3740     }
3741   else
3742     {
3743       x = gen_raw_REG (address_mode, (*regno)++);
3744       x = gen_rtx_MEM (DECL_MODE (obj), x);
3745       set_mem_addr_space (x, as);
3746     }
3747
3748   return x;
3749 }
3750
3751 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3752    walk_tree.  DATA contains the actual fake register number.  */
3753
3754 static tree
3755 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3756 {
3757   tree obj = NULL_TREE;
3758   rtx x = NULL_RTX;
3759   int *regno = (int *) data;
3760
3761   switch (TREE_CODE (*expr_p))
3762     {
3763     case ADDR_EXPR:
3764       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3765            handled_component_p (*expr_p);
3766            expr_p = &TREE_OPERAND (*expr_p, 0))
3767         continue;
3768       obj = *expr_p;
3769       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3770         x = produce_memory_decl_rtl (obj, regno);
3771       break;
3772
3773     case SSA_NAME:
3774       *ws = 0;
3775       obj = SSA_NAME_VAR (*expr_p);
3776       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3777       if (!obj)
3778         return NULL_TREE;
3779       if (!DECL_RTL_SET_P (obj))
3780         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3781       break;
3782
3783     case VAR_DECL:
3784     case PARM_DECL:
3785     case RESULT_DECL:
3786       *ws = 0;
3787       obj = *expr_p;
3788
3789       if (DECL_RTL_SET_P (obj))
3790         break;
3791
3792       if (DECL_MODE (obj) == BLKmode)
3793         x = produce_memory_decl_rtl (obj, regno);
3794       else
3795         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3796
3797       break;
3798
3799     default:
3800       break;
3801     }
3802
3803   if (x)
3804     {
3805       decl_rtl_to_reset.safe_push (obj);
3806       SET_DECL_RTL (obj, x);
3807     }
3808
3809   return NULL_TREE;
3810 }
3811
3812 /* Predict whether the given loop will be transformed in the RTL
3813    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3814    This is only for target independent checks, see targetm.predict_doloop_p
3815    for the target dependent ones.
3816
3817    Note that according to some initial investigation, some checks like costly
3818    niter check and invalid stmt scanning don't have much gains among general
3819    cases, so keep this as simple as possible first.
3820
3821    Some RTL specific checks seems unable to be checked in gimple, if any new
3822    checks or easy checks _are_ missing here, please add them.  */
3823
3824 static bool
3825 generic_predict_doloop_p (struct ivopts_data *data)
3826 {
3827   class loop *loop = data->current_loop;
3828
3829   /* Call target hook for target dependent checks.  */
3830   if (!targetm.predict_doloop_p (loop))
3831     {
3832       if (dump_file && (dump_flags & TDF_DETAILS))
3833         fprintf (dump_file, "Predict doloop failure due to"
3834                             " target specific checks.\n");
3835       return false;
3836     }
3837
3838   /* Similar to doloop_optimize, check iteration description to know it's
3839      suitable or not.  Keep it as simple as possible, feel free to extend it
3840      if you find any multiple exits cases matter.  */
3841   edge exit = single_dom_exit (loop);
3842   class tree_niter_desc *niter_desc;
3843   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3844     {
3845       if (dump_file && (dump_flags & TDF_DETAILS))
3846         fprintf (dump_file, "Predict doloop failure due to"
3847                             " unexpected niters.\n");
3848       return false;
3849     }
3850
3851   /* Similar to doloop_optimize, check whether iteration count too small
3852      and not profitable.  */
3853   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3854   if (est_niter == -1)
3855     est_niter = get_likely_max_loop_iterations_int (loop);
3856   if (est_niter >= 0 && est_niter < 3)
3857     {
3858       if (dump_file && (dump_flags & TDF_DETAILS))
3859         fprintf (dump_file,
3860                  "Predict doloop failure due to"
3861                  " too few iterations (%u).\n",
3862                  (unsigned int) est_niter);
3863       return false;
3864     }
3865
3866   return true;
3867 }
3868
3869 /* Determines cost of the computation of EXPR.  */
3870
3871 static unsigned
3872 computation_cost (tree expr, bool speed)
3873 {
3874   rtx_insn *seq;
3875   rtx rslt;
3876   tree type = TREE_TYPE (expr);
3877   unsigned cost;
3878   /* Avoid using hard regs in ways which may be unsupported.  */
3879   int regno = LAST_VIRTUAL_REGISTER + 1;
3880   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3881   enum node_frequency real_frequency = node->frequency;
3882
3883   node->frequency = NODE_FREQUENCY_NORMAL;
3884   crtl->maybe_hot_insn_p = speed;
3885   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3886   start_sequence ();
3887   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3888   seq = get_insns ();
3889   end_sequence ();
3890   default_rtl_profile ();
3891   node->frequency = real_frequency;
3892
3893   cost = seq_cost (seq, speed);
3894   if (MEM_P (rslt))
3895     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3896                           TYPE_ADDR_SPACE (type), speed);
3897   else if (!REG_P (rslt))
3898     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3899
3900   return cost;
3901 }
3902
3903 /* Returns variable containing the value of candidate CAND at statement AT.  */
3904
3905 static tree
3906 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3907 {
3908   if (stmt_after_increment (loop, cand, stmt))
3909     return cand->var_after;
3910   else
3911     return cand->var_before;
3912 }
3913
3914 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3915    same precision that is at least as wide as the precision of TYPE, stores
3916    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3917    type of A and B.  */
3918
3919 static tree
3920 determine_common_wider_type (tree *a, tree *b)
3921 {
3922   tree wider_type = NULL;
3923   tree suba, subb;
3924   tree atype = TREE_TYPE (*a);
3925
3926   if (CONVERT_EXPR_P (*a))
3927     {
3928       suba = TREE_OPERAND (*a, 0);
3929       wider_type = TREE_TYPE (suba);
3930       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3931         return atype;
3932     }
3933   else
3934     return atype;
3935
3936   if (CONVERT_EXPR_P (*b))
3937     {
3938       subb = TREE_OPERAND (*b, 0);
3939       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3940         return atype;
3941     }
3942   else
3943     return atype;
3944
3945   *a = suba;
3946   *b = subb;
3947   return wider_type;
3948 }
3949
3950 /* Determines the expression by that USE is expressed from induction variable
3951    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3952    decomposed form.  The invariant part is stored in AFF_INV; while variant
3953    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3954    non-null.  Returns false if USE cannot be expressed using CAND.  */
3955
3956 static bool
3957 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3958                        struct iv_cand *cand, class aff_tree *aff_inv,
3959                        class aff_tree *aff_var, widest_int *prat = NULL)
3960 {
3961   tree ubase = use->iv->base, ustep = use->iv->step;
3962   tree cbase = cand->iv->base, cstep = cand->iv->step;
3963   tree common_type, uutype, var, cstep_common;
3964   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3965   aff_tree aff_cbase;
3966   widest_int rat;
3967
3968   /* We must have a precision to express the values of use.  */
3969   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3970     return false;
3971
3972   var = var_at_stmt (loop, cand, at);
3973   uutype = unsigned_type_for (utype);
3974
3975   /* If the conversion is not noop, perform it.  */
3976   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3977     {
3978       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3979           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3980         {
3981           tree inner_base, inner_step, inner_type;
3982           inner_base = TREE_OPERAND (cbase, 0);
3983           if (CONVERT_EXPR_P (cstep))
3984             inner_step = TREE_OPERAND (cstep, 0);
3985           else
3986             inner_step = cstep;
3987
3988           inner_type = TREE_TYPE (inner_base);
3989           /* If candidate is added from a biv whose type is smaller than
3990              ctype, we know both candidate and the biv won't overflow.
3991              In this case, it's safe to skip the convertion in candidate.
3992              As an example, (unsigned short)((unsigned long)A) equals to
3993              (unsigned short)A, if A has a type no larger than short.  */
3994           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3995             {
3996               cbase = inner_base;
3997               cstep = inner_step;
3998             }
3999         }
4000       cbase = fold_convert (uutype, cbase);
4001       cstep = fold_convert (uutype, cstep);
4002       var = fold_convert (uutype, var);
4003     }
4004
4005   /* Ratio is 1 when computing the value of biv cand by itself.
4006      We can't rely on constant_multiple_of in this case because the
4007      use is created after the original biv is selected.  The call
4008      could fail because of inconsistent fold behavior.  See PR68021
4009      for more information.  */
4010   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4011     {
4012       gcc_assert (is_gimple_assign (use->stmt));
4013       gcc_assert (use->iv->ssa_name == cand->var_after);
4014       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4015       rat = 1;
4016     }
4017   else if (!constant_multiple_of (ustep, cstep, &rat))
4018     return false;
4019
4020   if (prat)
4021     *prat = rat;
4022
4023   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4024      type, we achieve better folding by computing their difference in this
4025      wider type, and cast the result to UUTYPE.  We do not need to worry about
4026      overflows, as all the arithmetics will in the end be performed in UUTYPE
4027      anyway.  */
4028   common_type = determine_common_wider_type (&ubase, &cbase);
4029
4030   /* use = ubase - ratio * cbase + ratio * var.  */
4031   tree_to_aff_combination (ubase, common_type, aff_inv);
4032   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4033   tree_to_aff_combination (var, uutype, aff_var);
4034
4035   /* We need to shift the value if we are after the increment.  */
4036   if (stmt_after_increment (loop, cand, at))
4037     {
4038       aff_tree cstep_aff;
4039
4040       if (common_type != uutype)
4041         cstep_common = fold_convert (common_type, cstep);
4042       else
4043         cstep_common = cstep;
4044
4045       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4046       aff_combination_add (&aff_cbase, &cstep_aff);
4047     }
4048
4049   aff_combination_scale (&aff_cbase, -rat);
4050   aff_combination_add (aff_inv, &aff_cbase);
4051   if (common_type != uutype)
4052     aff_combination_convert (aff_inv, uutype);
4053
4054   aff_combination_scale (aff_var, rat);
4055   return true;
4056 }
4057
4058 /* Determines the expression by that USE is expressed from induction variable
4059    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4060    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4061
4062 static bool
4063 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4064                      struct iv_cand *cand, class aff_tree *aff)
4065 {
4066   aff_tree aff_var;
4067
4068   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4069     return false;
4070
4071   aff_combination_add (aff, &aff_var);
4072   return true;
4073 }
4074
4075 /* Return the type of USE.  */
4076
4077 static tree
4078 get_use_type (struct iv_use *use)
4079 {
4080   tree base_type = TREE_TYPE (use->iv->base);
4081   tree type;
4082
4083   if (use->type == USE_REF_ADDRESS)
4084     {
4085       /* The base_type may be a void pointer.  Create a pointer type based on
4086          the mem_ref instead.  */
4087       type = build_pointer_type (TREE_TYPE (*use->op_p));
4088       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4089                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4090     }
4091   else
4092     type = base_type;
4093
4094   return type;
4095 }
4096
4097 /* Determines the expression by that USE is expressed from induction variable
4098    CAND at statement AT in LOOP.  The computation is unshared.  */
4099
4100 static tree
4101 get_computation_at (class loop *loop, gimple *at,
4102                     struct iv_use *use, struct iv_cand *cand)
4103 {
4104   aff_tree aff;
4105   tree type = get_use_type (use);
4106
4107   if (!get_computation_aff (loop, at, use, cand, &aff))
4108     return NULL_TREE;
4109   unshare_aff_combination (&aff);
4110   return fold_convert (type, aff_combination_to_tree (&aff));
4111 }
4112
4113 /* Like get_computation_at, but try harder, even if the computation
4114    is more expensive.  Intended for debug stmts.  */
4115
4116 static tree
4117 get_debug_computation_at (class loop *loop, gimple *at,
4118                           struct iv_use *use, struct iv_cand *cand)
4119 {
4120   if (tree ret = get_computation_at (loop, at, use, cand))
4121     return ret;
4122
4123   tree ubase = use->iv->base, ustep = use->iv->step;
4124   tree cbase = cand->iv->base, cstep = cand->iv->step;
4125   tree var;
4126   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4127   widest_int rat;
4128
4129   /* We must have a precision to express the values of use.  */
4130   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4131     return NULL_TREE;
4132
4133   /* Try to handle the case that get_computation_at doesn't,
4134      try to express
4135      use = ubase + (var - cbase) / ratio.  */
4136   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4137                              &rat))
4138     return NULL_TREE;
4139
4140   bool neg_p = false;
4141   if (wi::neg_p (rat))
4142     {
4143       if (TYPE_UNSIGNED (ctype))
4144         return NULL_TREE;
4145       neg_p = true;
4146       rat = wi::neg (rat);
4147     }
4148
4149   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4150      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4151      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4152      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4153      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4154   if (!use->iv->no_overflow
4155       && !cand->iv->no_overflow
4156       && !integer_pow2p (cstep))
4157     return NULL_TREE;
4158
4159   int bits = wi::exact_log2 (rat);
4160   if (bits == -1)
4161     bits = wi::floor_log2 (rat) + 1;
4162   if (!cand->iv->no_overflow
4163       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4164     return NULL_TREE;
4165
4166   var = var_at_stmt (loop, cand, at);
4167
4168   if (POINTER_TYPE_P (ctype))
4169     {
4170       ctype = unsigned_type_for (ctype);
4171       cbase = fold_convert (ctype, cbase);
4172       cstep = fold_convert (ctype, cstep);
4173       var = fold_convert (ctype, var);
4174     }
4175
4176   if (stmt_after_increment (loop, cand, at))
4177     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4178                        unshare_expr (cstep));
4179
4180   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4181   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4182                      wide_int_to_tree (TREE_TYPE (var), rat));
4183   if (POINTER_TYPE_P (utype))
4184     {
4185       var = fold_convert (sizetype, var);
4186       if (neg_p)
4187         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4188       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4189     }
4190   else
4191     {
4192       var = fold_convert (utype, var);
4193       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4194                          ubase, var);
4195     }
4196   return var;
4197 }
4198
4199 /* Adjust the cost COST for being in loop setup rather than loop body.
4200    If we're optimizing for space, the loop setup overhead is constant;
4201    if we're optimizing for speed, amortize it over the per-iteration cost.
4202    If ROUND_UP_P is true, the result is round up rather than to zero when
4203    optimizing for speed.  */
4204 static int64_t
4205 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4206                    bool round_up_p = false)
4207 {
4208   if (cost == INFTY)
4209     return cost;
4210   else if (optimize_loop_for_speed_p (data->current_loop))
4211     {
4212       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4213       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4214     }
4215   else
4216     return cost;
4217 }
4218
4219 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4220    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4221    calculating the operands of EXPR.  Returns true if successful, and returns
4222    the cost in COST.  */
4223
4224 static bool
4225 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4226                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4227 {
4228   comp_cost res;
4229   tree op1 = TREE_OPERAND (expr, 1);
4230   tree cst = TREE_OPERAND (mult, 1);
4231   tree multop = TREE_OPERAND (mult, 0);
4232   int m = exact_log2 (int_cst_value (cst));
4233   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4234   int as_cost, sa_cost;
4235   bool mult_in_op1;
4236
4237   if (!(m >= 0 && m < maxm))
4238     return false;
4239
4240   STRIP_NOPS (op1);
4241   mult_in_op1 = operand_equal_p (op1, mult, 0);
4242
4243   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4244
4245   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4246      use that in preference to a shift insn followed by an add insn.  */
4247   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4248              ? shiftadd_cost (speed, mode, m)
4249              : (mult_in_op1
4250                 ? shiftsub1_cost (speed, mode, m)
4251                 : shiftsub0_cost (speed, mode, m)));
4252
4253   res = comp_cost (MIN (as_cost, sa_cost), 0);
4254   res += (mult_in_op1 ? cost0 : cost1);
4255
4256   STRIP_NOPS (multop);
4257   if (!is_gimple_val (multop))
4258     res += force_expr_to_var_cost (multop, speed);
4259
4260   *cost = res;
4261   return true;
4262 }
4263
4264 /* Estimates cost of forcing expression EXPR into a variable.  */
4265
4266 static comp_cost
4267 force_expr_to_var_cost (tree expr, bool speed)
4268 {
4269   static bool costs_initialized = false;
4270   static unsigned integer_cost [2];
4271   static unsigned symbol_cost [2];
4272   static unsigned address_cost [2];
4273   tree op0, op1;
4274   comp_cost cost0, cost1, cost;
4275   machine_mode mode;
4276   scalar_int_mode int_mode;
4277
4278   if (!costs_initialized)
4279     {
4280       tree type = build_pointer_type (integer_type_node);
4281       tree var, addr;
4282       rtx x;
4283       int i;
4284
4285       var = create_tmp_var_raw (integer_type_node, "test_var");
4286       TREE_STATIC (var) = 1;
4287       x = produce_memory_decl_rtl (var, NULL);
4288       SET_DECL_RTL (var, x);
4289
4290       addr = build1 (ADDR_EXPR, type, var);
4291
4292
4293       for (i = 0; i < 2; i++)
4294         {
4295           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4296                                                              2000), i);
4297
4298           symbol_cost[i] = computation_cost (addr, i) + 1;
4299
4300           address_cost[i]
4301             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4302           if (dump_file && (dump_flags & TDF_DETAILS))
4303             {
4304               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4305               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4306               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4307               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4308               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4309               fprintf (dump_file, "\n");
4310             }
4311         }
4312
4313       costs_initialized = true;
4314     }
4315
4316   STRIP_NOPS (expr);
4317
4318   if (SSA_VAR_P (expr))
4319     return no_cost;
4320
4321   if (is_gimple_min_invariant (expr))
4322     {
4323       if (poly_int_tree_p (expr))
4324         return comp_cost (integer_cost [speed], 0);
4325
4326       if (TREE_CODE (expr) == ADDR_EXPR)
4327         {
4328           tree obj = TREE_OPERAND (expr, 0);
4329
4330           if (VAR_P (obj)
4331               || TREE_CODE (obj) == PARM_DECL
4332               || TREE_CODE (obj) == RESULT_DECL)
4333             return comp_cost (symbol_cost [speed], 0);
4334         }
4335
4336       return comp_cost (address_cost [speed], 0);
4337     }
4338
4339   switch (TREE_CODE (expr))
4340     {
4341     case POINTER_PLUS_EXPR:
4342     case PLUS_EXPR:
4343     case MINUS_EXPR:
4344     case MULT_EXPR:
4345     case TRUNC_DIV_EXPR:
4346     case BIT_AND_EXPR:
4347     case BIT_IOR_EXPR:
4348     case LSHIFT_EXPR:
4349     case RSHIFT_EXPR:
4350       op0 = TREE_OPERAND (expr, 0);
4351       op1 = TREE_OPERAND (expr, 1);
4352       STRIP_NOPS (op0);
4353       STRIP_NOPS (op1);
4354       break;
4355
4356     CASE_CONVERT:
4357     case NEGATE_EXPR:
4358     case BIT_NOT_EXPR:
4359       op0 = TREE_OPERAND (expr, 0);
4360       STRIP_NOPS (op0);
4361       op1 = NULL_TREE;
4362       break;
4363     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4364        introduce COND_EXPR for IV base, need to support better cost estimation
4365        for this COND_EXPR and tcc_comparison.  */
4366     case COND_EXPR:
4367       op0 = TREE_OPERAND (expr, 1);
4368       STRIP_NOPS (op0);
4369       op1 = TREE_OPERAND (expr, 2);
4370       STRIP_NOPS (op1);
4371       break;
4372     case LT_EXPR:
4373     case LE_EXPR:
4374     case GT_EXPR:
4375     case GE_EXPR:
4376     case EQ_EXPR:
4377     case NE_EXPR:
4378     case UNORDERED_EXPR:
4379     case ORDERED_EXPR:
4380     case UNLT_EXPR:
4381     case UNLE_EXPR:
4382     case UNGT_EXPR:
4383     case UNGE_EXPR:
4384     case UNEQ_EXPR:
4385     case LTGT_EXPR:
4386     case MAX_EXPR:
4387     case MIN_EXPR:
4388       op0 = TREE_OPERAND (expr, 0);
4389       STRIP_NOPS (op0);
4390       op1 = TREE_OPERAND (expr, 1);
4391       STRIP_NOPS (op1);
4392       break;
4393
4394     default:
4395       /* Just an arbitrary value, FIXME.  */
4396       return comp_cost (target_spill_cost[speed], 0);
4397     }
4398
4399   if (op0 == NULL_TREE
4400       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4401     cost0 = no_cost;
4402   else
4403     cost0 = force_expr_to_var_cost (op0, speed);
4404
4405   if (op1 == NULL_TREE
4406       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4407     cost1 = no_cost;
4408   else
4409     cost1 = force_expr_to_var_cost (op1, speed);
4410
4411   mode = TYPE_MODE (TREE_TYPE (expr));
4412   switch (TREE_CODE (expr))
4413     {
4414     case POINTER_PLUS_EXPR:
4415     case PLUS_EXPR:
4416     case MINUS_EXPR:
4417     case NEGATE_EXPR:
4418       cost = comp_cost (add_cost (speed, mode), 0);
4419       if (TREE_CODE (expr) != NEGATE_EXPR)
4420         {
4421           tree mult = NULL_TREE;
4422           comp_cost sa_cost;
4423           if (TREE_CODE (op1) == MULT_EXPR)
4424             mult = op1;
4425           else if (TREE_CODE (op0) == MULT_EXPR)
4426             mult = op0;
4427
4428           if (mult != NULL_TREE
4429               && is_a <scalar_int_mode> (mode, &int_mode)
4430               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4431               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4432                                     speed, &sa_cost))
4433             return sa_cost;
4434         }
4435       break;
4436
4437     CASE_CONVERT:
4438       {
4439         tree inner_mode, outer_mode;
4440         outer_mode = TREE_TYPE (expr);
4441         inner_mode = TREE_TYPE (op0);
4442         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4443                                        TYPE_MODE (inner_mode), speed), 0);
4444       }
4445       break;
4446
4447     case MULT_EXPR:
4448       if (cst_and_fits_in_hwi (op0))
4449         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4450                                              mode, speed), 0);
4451       else if (cst_and_fits_in_hwi (op1))
4452         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4453                                              mode, speed), 0);
4454       else
4455         return comp_cost (target_spill_cost [speed], 0);
4456       break;
4457
4458     case TRUNC_DIV_EXPR:
4459       /* Division by power of two is usually cheap, so we allow it.  Forbid
4460          anything else.  */
4461       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4462         cost = comp_cost (add_cost (speed, mode), 0);
4463       else
4464         cost = comp_cost (target_spill_cost[speed], 0);
4465       break;
4466
4467     case BIT_AND_EXPR:
4468     case BIT_IOR_EXPR:
4469     case BIT_NOT_EXPR:
4470     case LSHIFT_EXPR:
4471     case RSHIFT_EXPR:
4472       cost = comp_cost (add_cost (speed, mode), 0);
4473       break;
4474     case COND_EXPR:
4475       op0 = TREE_OPERAND (expr, 0);
4476       STRIP_NOPS (op0);
4477       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4478           || CONSTANT_CLASS_P (op0))
4479         cost = no_cost;
4480       else
4481         cost = force_expr_to_var_cost (op0, speed);
4482       break;
4483     case LT_EXPR:
4484     case LE_EXPR:
4485     case GT_EXPR:
4486     case GE_EXPR:
4487     case EQ_EXPR:
4488     case NE_EXPR:
4489     case UNORDERED_EXPR:
4490     case ORDERED_EXPR:
4491     case UNLT_EXPR:
4492     case UNLE_EXPR:
4493     case UNGT_EXPR:
4494     case UNGE_EXPR:
4495     case UNEQ_EXPR:
4496     case LTGT_EXPR:
4497     case MAX_EXPR:
4498     case MIN_EXPR:
4499       /* Simply use add cost for now, FIXME if there is some more accurate cost
4500          evaluation way.  */
4501       cost = comp_cost (add_cost (speed, mode), 0);
4502       break;
4503
4504     default:
4505       gcc_unreachable ();
4506     }
4507
4508   cost += cost0;
4509   cost += cost1;
4510   return cost;
4511 }
4512
4513 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4514    invariants the computation depends on.  */
4515
4516 static comp_cost
4517 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4518 {
4519   if (!expr)
4520     return no_cost;
4521
4522   find_inv_vars (data, &expr, inv_vars);
4523   return force_expr_to_var_cost (expr, data->speed);
4524 }
4525
4526 /* Returns cost of auto-modifying address expression in shape base + offset.
4527    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4528    address expression.  The address expression has ADDR_MODE in addr space
4529    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4530    speed or size.  */
4531
4532 enum ainc_type
4533 {
4534   AINC_PRE_INC,         /* Pre increment.  */
4535   AINC_PRE_DEC,         /* Pre decrement.  */
4536   AINC_POST_INC,        /* Post increment.  */
4537   AINC_POST_DEC,        /* Post decrement.  */
4538   AINC_NONE             /* Also the number of auto increment types.  */
4539 };
4540
4541 struct ainc_cost_data
4542 {
4543   int64_t costs[AINC_NONE];
4544 };
4545
4546 static comp_cost
4547 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4548                        machine_mode addr_mode, machine_mode mem_mode,
4549                        addr_space_t as, bool speed)
4550 {
4551   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4552       && !USE_STORE_PRE_DECREMENT (mem_mode)
4553       && !USE_LOAD_POST_DECREMENT (mem_mode)
4554       && !USE_STORE_POST_DECREMENT (mem_mode)
4555       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4556       && !USE_STORE_PRE_INCREMENT (mem_mode)
4557       && !USE_LOAD_POST_INCREMENT (mem_mode)
4558       && !USE_STORE_POST_INCREMENT (mem_mode))
4559     return infinite_cost;
4560
4561   static vec<ainc_cost_data *> ainc_cost_data_list;
4562   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4563   if (idx >= ainc_cost_data_list.length ())
4564     {
4565       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4566
4567       gcc_assert (nsize > idx);
4568       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4569     }
4570
4571   ainc_cost_data *data = ainc_cost_data_list[idx];
4572   if (data == NULL)
4573     {
4574       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4575
4576       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4577       data->costs[AINC_PRE_DEC] = INFTY;
4578       data->costs[AINC_POST_DEC] = INFTY;
4579       data->costs[AINC_PRE_INC] = INFTY;
4580       data->costs[AINC_POST_INC] = INFTY;
4581       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4582           || USE_STORE_PRE_DECREMENT (mem_mode))
4583         {
4584           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4585
4586           if (memory_address_addr_space_p (mem_mode, addr, as))
4587             data->costs[AINC_PRE_DEC]
4588               = address_cost (addr, mem_mode, as, speed);
4589         }
4590       if (USE_LOAD_POST_DECREMENT (mem_mode)
4591           || USE_STORE_POST_DECREMENT (mem_mode))
4592         {
4593           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4594
4595           if (memory_address_addr_space_p (mem_mode, addr, as))
4596             data->costs[AINC_POST_DEC]
4597               = address_cost (addr, mem_mode, as, speed);
4598         }
4599       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4600           || USE_STORE_PRE_INCREMENT (mem_mode))
4601         {
4602           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4603
4604           if (memory_address_addr_space_p (mem_mode, addr, as))
4605             data->costs[AINC_PRE_INC]
4606               = address_cost (addr, mem_mode, as, speed);
4607         }
4608       if (USE_LOAD_POST_INCREMENT (mem_mode)
4609           || USE_STORE_POST_INCREMENT (mem_mode))
4610         {
4611           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4612
4613           if (memory_address_addr_space_p (mem_mode, addr, as))
4614             data->costs[AINC_POST_INC]
4615               = address_cost (addr, mem_mode, as, speed);
4616         }
4617       ainc_cost_data_list[idx] = data;
4618     }
4619
4620   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4621   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4622     return comp_cost (data->costs[AINC_POST_INC], 0);
4623   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4624     return comp_cost (data->costs[AINC_POST_DEC], 0);
4625   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4626     return comp_cost (data->costs[AINC_PRE_INC], 0);
4627   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4628     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4629
4630   return infinite_cost;
4631 }
4632
4633 /* Return cost of computing USE's address expression by using CAND.
4634    AFF_INV and AFF_VAR represent invariant and variant parts of the
4635    address expression, respectively.  If AFF_INV is simple, store
4636    the loop invariant variables which are depended by it in INV_VARS;
4637    if AFF_INV is complicated, handle it as a new invariant expression
4638    and record it in INV_EXPR.  RATIO indicates multiple times between
4639    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4640    value to it indicating if this is an auto-increment address.  */
4641
4642 static comp_cost
4643 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4644                   struct iv_cand *cand, aff_tree *aff_inv,
4645                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4646                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4647                   bool *can_autoinc, bool speed)
4648 {
4649   rtx addr;
4650   bool simple_inv = true;
4651   tree comp_inv = NULL_TREE, type = aff_var->type;
4652   comp_cost var_cost = no_cost, cost = no_cost;
4653   struct mem_address parts = {NULL_TREE, integer_one_node,
4654                               NULL_TREE, NULL_TREE, NULL_TREE};
4655   machine_mode addr_mode = TYPE_MODE (type);
4656   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4657   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4658   /* Only true if ratio != 1.  */
4659   bool ok_with_ratio_p = false;
4660   bool ok_without_ratio_p = false;
4661
4662   if (!aff_combination_const_p (aff_inv))
4663     {
4664       parts.index = integer_one_node;
4665       /* Addressing mode "base + index".  */
4666       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4667       if (ratio != 1)
4668         {
4669           parts.step = wide_int_to_tree (type, ratio);
4670           /* Addressing mode "base + index << scale".  */
4671           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4672           if (!ok_with_ratio_p)
4673             parts.step = NULL_TREE;
4674         }
4675       if (ok_with_ratio_p || ok_without_ratio_p)
4676         {
4677           if (maybe_ne (aff_inv->offset, 0))
4678             {
4679               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4680               /* Addressing mode "base + index [<< scale] + offset".  */
4681               if (!valid_mem_ref_p (mem_mode, as, &parts))
4682                 parts.offset = NULL_TREE;
4683               else
4684                 aff_inv->offset = 0;
4685             }
4686
4687           move_fixed_address_to_symbol (&parts, aff_inv);
4688           /* Base is fixed address and is moved to symbol part.  */
4689           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4690             parts.base = NULL_TREE;
4691
4692           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4693           if (parts.symbol != NULL_TREE
4694               && !valid_mem_ref_p (mem_mode, as, &parts))
4695             {
4696               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4697               parts.symbol = NULL_TREE;
4698               /* Reset SIMPLE_INV since symbol address needs to be computed
4699                  outside of address expression in this case.  */
4700               simple_inv = false;
4701               /* Symbol part is moved back to base part, it can't be NULL.  */
4702               parts.base = integer_one_node;
4703             }
4704         }
4705       else
4706         parts.index = NULL_TREE;
4707     }
4708   else
4709     {
4710       poly_int64 ainc_step;
4711       if (can_autoinc
4712           && ratio == 1
4713           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4714         {
4715           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4716
4717           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4718             ainc_offset += ainc_step;
4719           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4720                                         addr_mode, mem_mode, as, speed);
4721           if (!cost.infinite_cost_p ())
4722             {
4723               *can_autoinc = true;
4724               return cost;
4725             }
4726           cost = no_cost;
4727         }
4728       if (!aff_combination_zero_p (aff_inv))
4729         {
4730           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4731           /* Addressing mode "base + offset".  */
4732           if (!valid_mem_ref_p (mem_mode, as, &parts))
4733             parts.offset = NULL_TREE;
4734           else
4735             aff_inv->offset = 0;
4736         }
4737     }
4738
4739   if (simple_inv)
4740     simple_inv = (aff_inv == NULL
4741                   || aff_combination_const_p (aff_inv)
4742                   || aff_combination_singleton_var_p (aff_inv));
4743   if (!aff_combination_zero_p (aff_inv))
4744     comp_inv = aff_combination_to_tree (aff_inv);
4745   if (comp_inv != NULL_TREE)
4746     cost = force_var_cost (data, comp_inv, inv_vars);
4747   if (ratio != 1 && parts.step == NULL_TREE)
4748     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4749   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4750     var_cost += add_cost (speed, addr_mode);
4751
4752   if (comp_inv && inv_expr && !simple_inv)
4753     {
4754       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4755       /* Clear depends on.  */
4756       if (*inv_expr != NULL && inv_vars && *inv_vars)
4757         bitmap_clear (*inv_vars);
4758
4759       /* Cost of small invariant expression adjusted against loop niters
4760          is usually zero, which makes it difficult to be differentiated
4761          from candidate based on loop invariant variables.  Secondly, the
4762          generated invariant expression may not be hoisted out of loop by
4763          following pass.  We penalize the cost by rounding up in order to
4764          neutralize such effects.  */
4765       cost.cost = adjust_setup_cost (data, cost.cost, true);
4766       cost.scratch = cost.cost;
4767     }
4768
4769   cost += var_cost;
4770   addr = addr_for_mem_ref (&parts, as, false);
4771   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4772   cost += address_cost (addr, mem_mode, as, speed);
4773
4774   if (parts.symbol != NULL_TREE)
4775     cost.complexity += 1;
4776   /* Don't increase the complexity of adding a scaled index if it's
4777      the only kind of index that the target allows.  */
4778   if (parts.step != NULL_TREE && ok_without_ratio_p)
4779     cost.complexity += 1;
4780   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4781     cost.complexity += 1;
4782   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4783     cost.complexity += 1;
4784
4785   return cost;
4786 }
4787
4788 /* Scale (multiply) the computed COST (except scratch part that should be
4789    hoisted out a loop) by header->frequency / AT->frequency, which makes
4790    expected cost more accurate.  */
4791
4792 static comp_cost
4793 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4794 {
4795   if (data->speed
4796       && data->current_loop->header->count.to_frequency (cfun) > 0)
4797     {
4798       basic_block bb = gimple_bb (at);
4799       gcc_assert (cost.scratch <= cost.cost);
4800       int scale_factor = (int)(intptr_t) bb->aux;
4801       if (scale_factor == 1)
4802         return cost;
4803
4804       int64_t scaled_cost
4805         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4806
4807       if (dump_file && (dump_flags & TDF_DETAILS))
4808         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4809                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4810                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4811
4812       cost.cost = scaled_cost;
4813     }
4814
4815   return cost;
4816 }
4817
4818 /* Determines the cost of the computation by that USE is expressed
4819    from induction variable CAND.  If ADDRESS_P is true, we just need
4820    to create an address from it, otherwise we want to get it into
4821    register.  A set of invariants we depend on is stored in INV_VARS.
4822    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4823    addressing is likely.  If INV_EXPR is nonnull, record invariant
4824    expr entry in it.  */
4825
4826 static comp_cost
4827 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4828                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4829                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4830 {
4831   gimple *at = use->stmt;
4832   tree ubase = use->iv->base, cbase = cand->iv->base;
4833   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4834   tree comp_inv = NULL_TREE;
4835   HOST_WIDE_INT ratio, aratio;
4836   comp_cost cost;
4837   widest_int rat;
4838   aff_tree aff_inv, aff_var;
4839   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4840
4841   if (inv_vars)
4842     *inv_vars = NULL;
4843   if (can_autoinc)
4844     *can_autoinc = false;
4845   if (inv_expr)
4846     *inv_expr = NULL;
4847
4848   /* Check if we have enough precision to express the values of use.  */
4849   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4850     return infinite_cost;
4851
4852   if (address_p
4853       || (use->iv->base_object
4854           && cand->iv->base_object
4855           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4856           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4857     {
4858       /* Do not try to express address of an object with computation based
4859          on address of a different object.  This may cause problems in rtl
4860          level alias analysis (that does not expect this to be happening,
4861          as this is illegal in C), and would be unlikely to be useful
4862          anyway.  */
4863       if (use->iv->base_object
4864           && cand->iv->base_object
4865           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4866         return infinite_cost;
4867     }
4868
4869   if (!get_computation_aff_1 (data->current_loop, at, use,
4870                               cand, &aff_inv, &aff_var, &rat)
4871       || !wi::fits_shwi_p (rat))
4872     return infinite_cost;
4873
4874   ratio = rat.to_shwi ();
4875   if (address_p)
4876     {
4877       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4878                                inv_vars, inv_expr, can_autoinc, speed);
4879       cost = get_scaled_computation_cost_at (data, at, cost);
4880       /* For doloop IV cand, add on the extra cost.  */
4881       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4882       return cost;
4883     }
4884
4885   bool simple_inv = (aff_combination_const_p (&aff_inv)
4886                      || aff_combination_singleton_var_p (&aff_inv));
4887   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4888   aff_combination_convert (&aff_inv, signed_type);
4889   if (!aff_combination_zero_p (&aff_inv))
4890     comp_inv = aff_combination_to_tree (&aff_inv);
4891
4892   cost = force_var_cost (data, comp_inv, inv_vars);
4893   if (comp_inv && inv_expr && !simple_inv)
4894     {
4895       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4896       /* Clear depends on.  */
4897       if (*inv_expr != NULL && inv_vars && *inv_vars)
4898         bitmap_clear (*inv_vars);
4899
4900       cost.cost = adjust_setup_cost (data, cost.cost);
4901       /* Record setup cost in scratch field.  */
4902       cost.scratch = cost.cost;
4903     }
4904   /* Cost of constant integer can be covered when adding invariant part to
4905      variant part.  */
4906   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4907     cost = no_cost;
4908
4909   /* Need type narrowing to represent use with cand.  */
4910   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4911     {
4912       machine_mode outer_mode = TYPE_MODE (utype);
4913       machine_mode inner_mode = TYPE_MODE (ctype);
4914       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4915     }
4916
4917   /* Turn a + i * (-c) into a - i * c.  */
4918   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4919     aratio = -ratio;
4920   else
4921     aratio = ratio;
4922
4923   if (ratio != 1)
4924     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4925
4926   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4927      instruction.  */
4928   /* Need to add up the invariant and variant parts.  */
4929   if (comp_inv && !integer_zerop (comp_inv))
4930     cost += add_cost (speed, TYPE_MODE (utype));
4931
4932   cost = get_scaled_computation_cost_at (data, at, cost);
4933
4934   /* For doloop IV cand, add on the extra cost.  */
4935   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4936     cost += targetm.doloop_cost_for_generic;
4937
4938   return cost;
4939 }
4940
4941 /* Determines cost of computing the use in GROUP with CAND in a generic
4942    expression.  */
4943
4944 static bool
4945 determine_group_iv_cost_generic (struct ivopts_data *data,
4946                                  struct iv_group *group, struct iv_cand *cand)
4947 {
4948   comp_cost cost;
4949   iv_inv_expr_ent *inv_expr = NULL;
4950   bitmap inv_vars = NULL, inv_exprs = NULL;
4951   struct iv_use *use = group->vuses[0];
4952
4953   /* The simple case first -- if we need to express value of the preserved
4954      original biv, the cost is 0.  This also prevents us from counting the
4955      cost of increment twice -- once at this use and once in the cost of
4956      the candidate.  */
4957   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4958     cost = no_cost;
4959   else
4960     cost = get_computation_cost (data, use, cand, false,
4961                                  &inv_vars, NULL, &inv_expr);
4962
4963   if (inv_expr)
4964     {
4965       inv_exprs = BITMAP_ALLOC (NULL);
4966       bitmap_set_bit (inv_exprs, inv_expr->id);
4967     }
4968   set_group_iv_cost (data, group, cand, cost, inv_vars,
4969                      NULL_TREE, ERROR_MARK, inv_exprs);
4970   return !cost.infinite_cost_p ();
4971 }
4972
4973 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4974
4975 static bool
4976 determine_group_iv_cost_address (struct ivopts_data *data,
4977                                  struct iv_group *group, struct iv_cand *cand)
4978 {
4979   unsigned i;
4980   bitmap inv_vars = NULL, inv_exprs = NULL;
4981   bool can_autoinc;
4982   iv_inv_expr_ent *inv_expr = NULL;
4983   struct iv_use *use = group->vuses[0];
4984   comp_cost sum_cost = no_cost, cost;
4985
4986   cost = get_computation_cost (data, use, cand, true,
4987                                &inv_vars, &can_autoinc, &inv_expr);
4988
4989   if (inv_expr)
4990     {
4991       inv_exprs = BITMAP_ALLOC (NULL);
4992       bitmap_set_bit (inv_exprs, inv_expr->id);
4993     }
4994   sum_cost = cost;
4995   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4996     {
4997       if (can_autoinc)
4998         sum_cost -= cand->cost_step;
4999       /* If we generated the candidate solely for exploiting autoincrement
5000          opportunities, and it turns out it can't be used, set the cost to
5001          infinity to make sure we ignore it.  */
5002       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5003         sum_cost = infinite_cost;
5004     }
5005
5006   /* Uses in a group can share setup code, so only add setup cost once.  */
5007   cost -= cost.scratch;
5008   /* Compute and add costs for rest uses of this group.  */
5009   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5010     {
5011       struct iv_use *next = group->vuses[i];
5012
5013       /* TODO: We could skip computing cost for sub iv_use when it has the
5014          same cost as the first iv_use, but the cost really depends on the
5015          offset and where the iv_use is.  */
5016         cost = get_computation_cost (data, next, cand, true,
5017                                      NULL, &can_autoinc, &inv_expr);
5018         if (inv_expr)
5019           {
5020             if (!inv_exprs)
5021               inv_exprs = BITMAP_ALLOC (NULL);
5022
5023             bitmap_set_bit (inv_exprs, inv_expr->id);
5024           }
5025       sum_cost += cost;
5026     }
5027   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5028                      NULL_TREE, ERROR_MARK, inv_exprs);
5029
5030   return !sum_cost.infinite_cost_p ();
5031 }
5032
5033 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5034    and stores it to VAL.  */
5035
5036 static void
5037 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5038                class tree_niter_desc *desc, aff_tree *val)
5039 {
5040   aff_tree step, delta, nit;
5041   struct iv *iv = cand->iv;
5042   tree type = TREE_TYPE (iv->base);
5043   tree niter = desc->niter;
5044   bool after_adjust = stmt_after_increment (loop, cand, at);
5045   tree steptype;
5046
5047   if (POINTER_TYPE_P (type))
5048     steptype = sizetype;
5049   else
5050     steptype = unsigned_type_for (type);
5051
5052   /* If AFTER_ADJUST is required, the code below generates the equivalent
5053      of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5054      BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5055      SSA_NAME - 1.  Unfortunately, guaranteeing that adding 1 to NITER
5056      doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5057      class for common idioms that we know are safe.  */
5058   if (after_adjust
5059       && desc->control.no_overflow
5060       && integer_onep (desc->control.step)
5061       && (desc->cmp == LT_EXPR
5062           || desc->cmp == NE_EXPR)
5063       && TREE_CODE (desc->bound) == SSA_NAME)
5064     {
5065       if (integer_onep (desc->control.base))
5066         {
5067           niter = desc->bound;
5068           after_adjust = false;
5069         }
5070       else if (TREE_CODE (niter) == MINUS_EXPR
5071                && integer_onep (TREE_OPERAND (niter, 1)))
5072         {
5073           niter = TREE_OPERAND (niter, 0);
5074           after_adjust = false;
5075         }
5076     }
5077
5078   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5079   aff_combination_convert (&step, steptype);
5080   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5081   aff_combination_convert (&nit, steptype);
5082   aff_combination_mult (&nit, &step, &delta);
5083   if (after_adjust)
5084     aff_combination_add (&delta, &step);
5085
5086   tree_to_aff_combination (iv->base, type, val);
5087   if (!POINTER_TYPE_P (type))
5088     aff_combination_convert (val, steptype);
5089   aff_combination_add (val, &delta);
5090 }
5091
5092 /* Returns period of induction variable iv.  */
5093
5094 static tree
5095 iv_period (struct iv *iv)
5096 {
5097   tree step = iv->step, period, type;
5098   tree pow2div;
5099
5100   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5101
5102   type = unsigned_type_for (TREE_TYPE (step));
5103   /* Period of the iv is lcm (step, type_range)/step -1,
5104      i.e., N*type_range/step - 1. Since type range is power
5105      of two, N == (step >> num_of_ending_zeros_binary (step),
5106      so the final result is
5107
5108        (type_range >> num_of_ending_zeros_binary (step)) - 1
5109
5110   */
5111   pow2div = num_ending_zeros (step);
5112
5113   period = build_low_bits_mask (type,
5114                                 (TYPE_PRECISION (type)
5115                                  - tree_to_uhwi (pow2div)));
5116
5117   return period;
5118 }
5119
5120 /* Returns the comparison operator used when eliminating the iv USE.  */
5121
5122 static enum tree_code
5123 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5124 {
5125   class loop *loop = data->current_loop;
5126   basic_block ex_bb;
5127   edge exit;
5128
5129   ex_bb = gimple_bb (use->stmt);
5130   exit = EDGE_SUCC (ex_bb, 0);
5131   if (flow_bb_inside_loop_p (loop, exit->dest))
5132     exit = EDGE_SUCC (ex_bb, 1);
5133
5134   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5135 }
5136
5137 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5138    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5139    calculation is performed in non-wrapping type.
5140
5141    TODO: More generally, we could test for the situation that
5142          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5143          This would require knowing the sign of OFFSET.  */
5144
5145 static bool
5146 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5147 {
5148   enum tree_code code;
5149   tree e1, e2;
5150   aff_tree aff_e1, aff_e2, aff_offset;
5151
5152   if (!nowrap_type_p (TREE_TYPE (base)))
5153     return false;
5154
5155   base = expand_simple_operations (base);
5156
5157   if (TREE_CODE (base) == SSA_NAME)
5158     {
5159       gimple *stmt = SSA_NAME_DEF_STMT (base);
5160
5161       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5162         return false;
5163
5164       code = gimple_assign_rhs_code (stmt);
5165       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5166         return false;
5167
5168       e1 = gimple_assign_rhs1 (stmt);
5169       e2 = gimple_assign_rhs2 (stmt);
5170     }
5171   else
5172     {
5173       code = TREE_CODE (base);
5174       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5175         return false;
5176       e1 = TREE_OPERAND (base, 0);
5177       e2 = TREE_OPERAND (base, 1);
5178     }
5179
5180   /* Use affine expansion as deeper inspection to prove the equality.  */
5181   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5182                                   &aff_e2, &data->name_expansion_cache);
5183   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5184                                   &aff_offset, &data->name_expansion_cache);
5185   aff_combination_scale (&aff_offset, -1);
5186   switch (code)
5187     {
5188     case PLUS_EXPR:
5189       aff_combination_add (&aff_e2, &aff_offset);
5190       if (aff_combination_zero_p (&aff_e2))
5191         return true;
5192
5193       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5194                                       &aff_e1, &data->name_expansion_cache);
5195       aff_combination_add (&aff_e1, &aff_offset);
5196       return aff_combination_zero_p (&aff_e1);
5197
5198     case POINTER_PLUS_EXPR:
5199       aff_combination_add (&aff_e2, &aff_offset);
5200       return aff_combination_zero_p (&aff_e2);
5201
5202     default:
5203       return false;
5204     }
5205 }
5206
5207 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5208    comparison with CAND.  NITER describes the number of iterations of
5209    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5210
5211    We aim to handle the following situation:
5212
5213    sometype *base, *p;
5214    int a, b, i;
5215
5216    i = a;
5217    p = p_0 = base + a;
5218
5219    do
5220      {
5221        bla (*p);
5222        p++;
5223        i++;
5224      }
5225    while (i < b);
5226
5227    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5228    We aim to optimize this to
5229
5230    p = p_0 = base + a;
5231    do
5232      {
5233        bla (*p);
5234        p++;
5235      }
5236    while (p < p_0 - a + b);
5237
5238    This preserves the correctness, since the pointer arithmetics does not
5239    overflow.  More precisely:
5240
5241    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5242       overflow in computing it or the values of p.
5243    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5244       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5245
5246 static bool
5247 iv_elimination_compare_lt (struct ivopts_data *data,
5248                            struct iv_cand *cand, enum tree_code *comp_p,
5249                            class tree_niter_desc *niter)
5250 {
5251   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5252   class aff_tree nit, tmpa, tmpb;
5253   enum tree_code comp;
5254   HOST_WIDE_INT step;
5255
5256   /* We need to know that the candidate induction variable does not overflow.
5257      While more complex analysis may be used to prove this, for now just
5258      check that the variable appears in the original program and that it
5259      is computed in a type that guarantees no overflows.  */
5260   cand_type = TREE_TYPE (cand->iv->base);
5261   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5262     return false;
5263
5264   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5265      the calculation of the BOUND could overflow, making the comparison
5266      invalid.  */
5267   if (!data->loop_single_exit_p)
5268     return false;
5269
5270   /* We need to be able to decide whether candidate is increasing or decreasing
5271      in order to choose the right comparison operator.  */
5272   if (!cst_and_fits_in_hwi (cand->iv->step))
5273     return false;
5274   step = int_cst_value (cand->iv->step);
5275
5276   /* Check that the number of iterations matches the expected pattern:
5277      a + 1 > b ? 0 : b - a - 1.  */
5278   mbz = niter->may_be_zero;
5279   if (TREE_CODE (mbz) == GT_EXPR)
5280     {
5281       /* Handle a + 1 > b.  */
5282       tree op0 = TREE_OPERAND (mbz, 0);
5283       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5284         {
5285           a = TREE_OPERAND (op0, 0);
5286           b = TREE_OPERAND (mbz, 1);
5287         }
5288       else
5289         return false;
5290     }
5291   else if (TREE_CODE (mbz) == LT_EXPR)
5292     {
5293       tree op1 = TREE_OPERAND (mbz, 1);
5294
5295       /* Handle b < a + 1.  */
5296       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5297         {
5298           a = TREE_OPERAND (op1, 0);
5299           b = TREE_OPERAND (mbz, 0);
5300         }
5301       else
5302         return false;
5303     }
5304   else
5305     return false;
5306
5307   /* Expected number of iterations is B - A - 1.  Check that it matches
5308      the actual number, i.e., that B - A - NITER = 1.  */
5309   tree_to_aff_combination (niter->niter, nit_type, &nit);
5310   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5311   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5312   aff_combination_scale (&nit, -1);
5313   aff_combination_scale (&tmpa, -1);
5314   aff_combination_add (&tmpb, &tmpa);
5315   aff_combination_add (&tmpb, &nit);
5316   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5317     return false;
5318
5319   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5320      overflow.  */
5321   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5322                         cand->iv->step,
5323                         fold_convert (TREE_TYPE (cand->iv->step), a));
5324   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5325     return false;
5326
5327   /* Determine the new comparison operator.  */
5328   comp = step < 0 ? GT_EXPR : LT_EXPR;
5329   if (*comp_p == NE_EXPR)
5330     *comp_p = comp;
5331   else if (*comp_p == EQ_EXPR)
5332     *comp_p = invert_tree_comparison (comp, false);
5333   else
5334     gcc_unreachable ();
5335
5336   return true;
5337 }
5338
5339 /* Check whether it is possible to express the condition in USE by comparison
5340    of candidate CAND.  If so, store the value compared with to BOUND, and the
5341    comparison operator to COMP.  */
5342
5343 static bool
5344 may_eliminate_iv (struct ivopts_data *data,
5345                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5346                   enum tree_code *comp)
5347 {
5348   basic_block ex_bb;
5349   edge exit;
5350   tree period;
5351   class loop *loop = data->current_loop;
5352   aff_tree bnd;
5353   class tree_niter_desc *desc = NULL;
5354
5355   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5356     return false;
5357
5358   /* For now works only for exits that dominate the loop latch.
5359      TODO: extend to other conditions inside loop body.  */
5360   ex_bb = gimple_bb (use->stmt);
5361   if (use->stmt != last_stmt (ex_bb)
5362       || gimple_code (use->stmt) != GIMPLE_COND
5363       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5364     return false;
5365
5366   exit = EDGE_SUCC (ex_bb, 0);
5367   if (flow_bb_inside_loop_p (loop, exit->dest))
5368     exit = EDGE_SUCC (ex_bb, 1);
5369   if (flow_bb_inside_loop_p (loop, exit->dest))
5370     return false;
5371
5372   desc = niter_for_exit (data, exit);
5373   if (!desc)
5374     return false;
5375
5376   /* Determine whether we can use the variable to test the exit condition.
5377      This is the case iff the period of the induction variable is greater
5378      than the number of iterations for which the exit condition is true.  */
5379   period = iv_period (cand->iv);
5380
5381   /* If the number of iterations is constant, compare against it directly.  */
5382   if (TREE_CODE (desc->niter) == INTEGER_CST)
5383     {
5384       /* See cand_value_at.  */
5385       if (stmt_after_increment (loop, cand, use->stmt))
5386         {
5387           if (!tree_int_cst_lt (desc->niter, period))
5388             return false;
5389         }
5390       else
5391         {
5392           if (tree_int_cst_lt (period, desc->niter))
5393             return false;
5394         }
5395     }
5396
5397   /* If not, and if this is the only possible exit of the loop, see whether
5398      we can get a conservative estimate on the number of iterations of the
5399      entire loop and compare against that instead.  */
5400   else
5401     {
5402       widest_int period_value, max_niter;
5403
5404       max_niter = desc->max;
5405       if (stmt_after_increment (loop, cand, use->stmt))
5406         max_niter += 1;
5407       period_value = wi::to_widest (period);
5408       if (wi::gtu_p (max_niter, period_value))
5409         {
5410           /* See if we can take advantage of inferred loop bound
5411              information.  */
5412           if (data->loop_single_exit_p)
5413             {
5414               if (!max_loop_iterations (loop, &max_niter))
5415                 return false;
5416               /* The loop bound is already adjusted by adding 1.  */
5417               if (wi::gtu_p (max_niter, period_value))
5418                 return false;
5419             }
5420           else
5421             return false;
5422         }
5423     }
5424
5425   /* For doloop IV cand, the bound would be zero.  It's safe whether
5426      may_be_zero set or not.  */
5427   if (cand->doloop_p)
5428     {
5429       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5430       *comp = iv_elimination_compare (data, use);
5431       return true;
5432     }
5433
5434   cand_value_at (loop, cand, use->stmt, desc, &bnd);
5435
5436   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5437                          aff_combination_to_tree (&bnd));
5438   *comp = iv_elimination_compare (data, use);
5439
5440   /* It is unlikely that computing the number of iterations using division
5441      would be more profitable than keeping the original induction variable.  */
5442   if (expression_expensive_p (*bound))
5443     return false;
5444
5445   /* Sometimes, it is possible to handle the situation that the number of
5446      iterations may be zero unless additional assumptions by using <
5447      instead of != in the exit condition.
5448
5449      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5450            base the exit condition on it.  However, that is often too
5451            expensive.  */
5452   if (!integer_zerop (desc->may_be_zero))
5453     return iv_elimination_compare_lt (data, cand, comp, desc);
5454
5455   return true;
5456 }
5457
5458  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5459     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5460
5461 static int
5462 parm_decl_cost (struct ivopts_data *data, tree bound)
5463 {
5464   tree sbound = bound;
5465   STRIP_NOPS (sbound);
5466
5467   if (TREE_CODE (sbound) == SSA_NAME
5468       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5469       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5470       && data->body_includes_call)
5471     return COSTS_N_INSNS (1);
5472
5473   return 0;
5474 }
5475
5476 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5477
5478 static bool
5479 determine_group_iv_cost_cond (struct ivopts_data *data,
5480                               struct iv_group *group, struct iv_cand *cand)
5481 {
5482   tree bound = NULL_TREE;
5483   struct iv *cmp_iv;
5484   bitmap inv_exprs = NULL;
5485   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5486   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5487   enum comp_iv_rewrite rewrite_type;
5488   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5489   tree *control_var, *bound_cst;
5490   enum tree_code comp = ERROR_MARK;
5491   struct iv_use *use = group->vuses[0];
5492
5493   /* Extract condition operands.  */
5494   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5495                                         &bound_cst, NULL, &cmp_iv);
5496   gcc_assert (rewrite_type != COMP_IV_NA);
5497
5498   /* Try iv elimination.  */
5499   if (rewrite_type == COMP_IV_ELIM
5500       && may_eliminate_iv (data, use, cand, &bound, &comp))
5501     {
5502       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5503       if (elim_cost.cost == 0)
5504         elim_cost.cost = parm_decl_cost (data, bound);
5505       else if (TREE_CODE (bound) == INTEGER_CST)
5506         elim_cost.cost = 0;
5507       /* If we replace a loop condition 'i < n' with 'p < base + n',
5508          inv_vars_elim will have 'base' and 'n' set, which implies that both
5509          'base' and 'n' will be live during the loop.    More likely,
5510          'base + n' will be loop invariant, resulting in only one live value
5511          during the loop.  So in that case we clear inv_vars_elim and set
5512          inv_expr_elim instead.  */
5513       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5514         {
5515           inv_expr_elim = get_loop_invariant_expr (data, bound);
5516           bitmap_clear (inv_vars_elim);
5517         }
5518       /* The bound is a loop invariant, so it will be only computed
5519          once.  */
5520       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5521     }
5522
5523   /* When the condition is a comparison of the candidate IV against
5524      zero, prefer this IV.
5525
5526      TODO: The constant that we're subtracting from the cost should
5527      be target-dependent.  This information should be added to the
5528      target costs for each backend.  */
5529   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5530       && integer_zerop (*bound_cst)
5531       && (operand_equal_p (*control_var, cand->var_after, 0)
5532           || operand_equal_p (*control_var, cand->var_before, 0)))
5533     elim_cost -= 1;
5534
5535   express_cost = get_computation_cost (data, use, cand, false,
5536                                        &inv_vars_express, NULL,
5537                                        &inv_expr_express);
5538   if (cmp_iv != NULL)
5539     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5540
5541   /* Count the cost of the original bound as well.  */
5542   bound_cost = force_var_cost (data, *bound_cst, NULL);
5543   if (bound_cost.cost == 0)
5544     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5545   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5546     bound_cost.cost = 0;
5547   express_cost += bound_cost;
5548
5549   /* Choose the better approach, preferring the eliminated IV. */
5550   if (elim_cost <= express_cost)
5551     {
5552       cost = elim_cost;
5553       inv_vars = inv_vars_elim;
5554       inv_vars_elim = NULL;
5555       inv_expr = inv_expr_elim;
5556       /* For doloop candidate/use pair, adjust to zero cost.  */
5557       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5558         cost = no_cost;
5559     }
5560   else
5561     {
5562       cost = express_cost;
5563       inv_vars = inv_vars_express;
5564       inv_vars_express = NULL;
5565       bound = NULL_TREE;
5566       comp = ERROR_MARK;
5567       inv_expr = inv_expr_express;
5568     }
5569
5570   if (inv_expr)
5571     {
5572       inv_exprs = BITMAP_ALLOC (NULL);
5573       bitmap_set_bit (inv_exprs, inv_expr->id);
5574     }
5575   set_group_iv_cost (data, group, cand, cost,
5576                      inv_vars, bound, comp, inv_exprs);
5577
5578   if (inv_vars_elim)
5579     BITMAP_FREE (inv_vars_elim);
5580   if (inv_vars_express)
5581     BITMAP_FREE (inv_vars_express);
5582
5583   return !cost.infinite_cost_p ();
5584 }
5585
5586 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5587    if USE cannot be represented with CAND.  */
5588
5589 static bool
5590 determine_group_iv_cost (struct ivopts_data *data,
5591                          struct iv_group *group, struct iv_cand *cand)
5592 {
5593   switch (group->type)
5594     {
5595     case USE_NONLINEAR_EXPR:
5596       return determine_group_iv_cost_generic (data, group, cand);
5597
5598     case USE_REF_ADDRESS:
5599     case USE_PTR_ADDRESS:
5600       return determine_group_iv_cost_address (data, group, cand);
5601
5602     case USE_COMPARE:
5603       return determine_group_iv_cost_cond (data, group, cand);
5604
5605     default:
5606       gcc_unreachable ();
5607     }
5608 }
5609
5610 /* Return true if get_computation_cost indicates that autoincrement is
5611    a possibility for the pair of USE and CAND, false otherwise.  */
5612
5613 static bool
5614 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5615                            struct iv_cand *cand)
5616 {
5617   if (!address_p (use->type))
5618     return false;
5619
5620   bool can_autoinc = false;
5621   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5622   return can_autoinc;
5623 }
5624
5625 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5626    use that allows autoincrement, and set their AINC_USE if possible.  */
5627
5628 static void
5629 set_autoinc_for_original_candidates (struct ivopts_data *data)
5630 {
5631   unsigned i, j;
5632
5633   for (i = 0; i < data->vcands.length (); i++)
5634     {
5635       struct iv_cand *cand = data->vcands[i];
5636       struct iv_use *closest_before = NULL;
5637       struct iv_use *closest_after = NULL;
5638       if (cand->pos != IP_ORIGINAL)
5639         continue;
5640
5641       for (j = 0; j < data->vgroups.length (); j++)
5642         {
5643           struct iv_group *group = data->vgroups[j];
5644           struct iv_use *use = group->vuses[0];
5645           unsigned uid = gimple_uid (use->stmt);
5646
5647           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5648             continue;
5649
5650           if (uid < gimple_uid (cand->incremented_at)
5651               && (closest_before == NULL
5652                   || uid > gimple_uid (closest_before->stmt)))
5653             closest_before = use;
5654
5655           if (uid > gimple_uid (cand->incremented_at)
5656               && (closest_after == NULL
5657                   || uid < gimple_uid (closest_after->stmt)))
5658             closest_after = use;
5659         }
5660
5661       if (closest_before != NULL
5662           && autoinc_possible_for_pair (data, closest_before, cand))
5663         cand->ainc_use = closest_before;
5664       else if (closest_after != NULL
5665                && autoinc_possible_for_pair (data, closest_after, cand))
5666         cand->ainc_use = closest_after;
5667     }
5668 }
5669
5670 /* Relate compare use with all candidates.  */
5671
5672 static void
5673 relate_compare_use_with_all_cands (struct ivopts_data *data)
5674 {
5675   unsigned i, count = data->vcands.length ();
5676   for (i = 0; i < data->vgroups.length (); i++)
5677     {
5678       struct iv_group *group = data->vgroups[i];
5679
5680       if (group->type == USE_COMPARE)
5681         bitmap_set_range (group->related_cands, 0, count);
5682     }
5683 }
5684
5685 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5686    PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1.  */
5687
5688 static tree
5689 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5690                              const widest_int &iterations_max)
5691 {
5692   tree ntype = TREE_TYPE (niter);
5693   tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5694   if (!pref_type)
5695     return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5696                         build_int_cst (ntype, 1));
5697
5698   gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5699
5700   int prec = TYPE_PRECISION (ntype);
5701   int pref_prec = TYPE_PRECISION (pref_type);
5702
5703   tree base;
5704
5705   /* Check if the PREFERRED_MODED is able to present niter.  */
5706   if (pref_prec > prec
5707       || wi::ltu_p (iterations_max,
5708                     widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5709                                       UNSIGNED)))
5710     {
5711       /* No wrap, it is safe to use preferred type after niter + 1.  */
5712       if (wi::ltu_p (iterations_max,
5713                      widest_int::from (wi::max_value (prec, UNSIGNED),
5714                                        UNSIGNED)))
5715         {
5716           /* This could help to optimize "-1 +1" pair when niter looks
5717              like "n-1": n is in original mode.  "base = (n - 1) + 1"
5718              in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
5719           base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5720                               build_int_cst (ntype, 1));
5721           base = fold_convert (pref_type, base);
5722         }
5723
5724       /* To avoid wrap, convert niter to preferred type before plus 1.  */
5725       else
5726         {
5727           niter = fold_convert (pref_type, niter);
5728           base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5729                               build_int_cst (pref_type, 1));
5730         }
5731     }
5732   else
5733     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5734                         build_int_cst (ntype, 1));
5735   return base;
5736 }
5737
5738 /* Add one doloop dedicated IV candidate:
5739      - Base is (may_be_zero ? 1 : (niter + 1)).
5740      - Step is -1.  */
5741
5742 static void
5743 add_iv_candidate_for_doloop (struct ivopts_data *data)
5744 {
5745   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5746   gcc_assert (niter_desc && niter_desc->assumptions);
5747
5748   tree niter = niter_desc->niter;
5749   tree ntype = TREE_TYPE (niter);
5750   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5751
5752   tree may_be_zero = niter_desc->may_be_zero;
5753   if (may_be_zero && integer_zerop (may_be_zero))
5754     may_be_zero = NULL_TREE;
5755   if (may_be_zero)
5756     {
5757       if (COMPARISON_CLASS_P (may_be_zero))
5758         {
5759           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5760                                build_int_cst (ntype, 0),
5761                                rewrite_to_non_trapping_overflow (niter));
5762         }
5763       /* Don't try to obtain the iteration count expression when may_be_zero is
5764          integer_nonzerop (actually iteration count is one) or else.  */
5765       else
5766         return;
5767     }
5768
5769   machine_mode mode = TYPE_MODE (ntype);
5770   machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5771
5772   tree base;
5773   if (mode != pref_mode)
5774     {
5775       base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5776       ntype = TREE_TYPE (base);
5777     }
5778   else
5779     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5780                         build_int_cst (ntype, 1));
5781
5782
5783   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5784 }
5785
5786 /* Finds the candidates for the induction variables.  */
5787
5788 static void
5789 find_iv_candidates (struct ivopts_data *data)
5790 {
5791   /* Add commonly used ivs.  */
5792   add_standard_iv_candidates (data);
5793
5794   /* Add doloop dedicated ivs.  */
5795   if (data->doloop_use_p)
5796     add_iv_candidate_for_doloop (data);
5797
5798   /* Add old induction variables.  */
5799   add_iv_candidate_for_bivs (data);
5800
5801   /* Add induction variables derived from uses.  */
5802   add_iv_candidate_for_groups (data);
5803
5804   set_autoinc_for_original_candidates (data);
5805
5806   /* Record the important candidates.  */
5807   record_important_candidates (data);
5808
5809   /* Relate compare iv_use with all candidates.  */
5810   if (!data->consider_all_candidates)
5811     relate_compare_use_with_all_cands (data);
5812
5813   if (dump_file && (dump_flags & TDF_DETAILS))
5814     {
5815       unsigned i;
5816
5817       fprintf (dump_file, "\n<Important Candidates>:\t");
5818       for (i = 0; i < data->vcands.length (); i++)
5819         if (data->vcands[i]->important)
5820           fprintf (dump_file, " %d,", data->vcands[i]->id);
5821       fprintf (dump_file, "\n");
5822
5823       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5824       for (i = 0; i < data->vgroups.length (); i++)
5825         {
5826           struct iv_group *group = data->vgroups[i];
5827
5828           if (group->related_cands)
5829             {
5830               fprintf (dump_file, "  Group %d:\t", group->id);
5831               dump_bitmap (dump_file, group->related_cands);
5832             }
5833         }
5834       fprintf (dump_file, "\n");
5835     }
5836 }
5837
5838 /* Determines costs of computing use of iv with an iv candidate.  */
5839
5840 static void
5841 determine_group_iv_costs (struct ivopts_data *data)
5842 {
5843   unsigned i, j;
5844   struct iv_cand *cand;
5845   struct iv_group *group;
5846   bitmap to_clear = BITMAP_ALLOC (NULL);
5847
5848   alloc_use_cost_map (data);
5849
5850   for (i = 0; i < data->vgroups.length (); i++)
5851     {
5852       group = data->vgroups[i];
5853
5854       if (data->consider_all_candidates)
5855         {
5856           for (j = 0; j < data->vcands.length (); j++)
5857             {
5858               cand = data->vcands[j];
5859               determine_group_iv_cost (data, group, cand);
5860             }
5861         }
5862       else
5863         {
5864           bitmap_iterator bi;
5865
5866           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5867             {
5868               cand = data->vcands[j];
5869               if (!determine_group_iv_cost (data, group, cand))
5870                 bitmap_set_bit (to_clear, j);
5871             }
5872
5873           /* Remove the candidates for that the cost is infinite from
5874              the list of related candidates.  */
5875           bitmap_and_compl_into (group->related_cands, to_clear);
5876           bitmap_clear (to_clear);
5877         }
5878     }
5879
5880   BITMAP_FREE (to_clear);
5881
5882   if (dump_file && (dump_flags & TDF_DETAILS))
5883     {
5884       bitmap_iterator bi;
5885
5886       /* Dump invariant variables.  */
5887       fprintf (dump_file, "\n<Invariant Vars>:\n");
5888       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5889         {
5890           struct version_info *info = ver_info (data, i);
5891           if (info->inv_id)
5892             {
5893               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5894               print_generic_expr (dump_file, info->name, TDF_SLIM);
5895               fprintf (dump_file, "%s\n",
5896                        info->has_nonlin_use ? "" : "\t(eliminable)");
5897             }
5898         }
5899
5900       /* Dump invariant expressions.  */
5901       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5902       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5903
5904       for (hash_table<iv_inv_expr_hasher>::iterator it
5905            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5906            ++it)
5907         list.safe_push (*it);
5908
5909       list.qsort (sort_iv_inv_expr_ent);
5910
5911       for (i = 0; i < list.length (); ++i)
5912         {
5913           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5914           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5915           fprintf (dump_file, "\n");
5916         }
5917
5918       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5919
5920       for (i = 0; i < data->vgroups.length (); i++)
5921         {
5922           group = data->vgroups[i];
5923
5924           fprintf (dump_file, "Group %d:\n", i);
5925           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5926           for (j = 0; j < group->n_map_members; j++)
5927             {
5928               if (!group->cost_map[j].cand
5929                   || group->cost_map[j].cost.infinite_cost_p ())
5930                 continue;
5931
5932               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5933                        group->cost_map[j].cand->id,
5934                        group->cost_map[j].cost.cost,
5935                        group->cost_map[j].cost.complexity);
5936               if (!group->cost_map[j].inv_exprs
5937                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5938                 fprintf (dump_file, "NIL;\t");
5939               else
5940                 bitmap_print (dump_file,
5941                               group->cost_map[j].inv_exprs, "", ";\t");
5942               if (!group->cost_map[j].inv_vars
5943                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5944                 fprintf (dump_file, "NIL;\n");
5945               else
5946                 bitmap_print (dump_file,
5947                               group->cost_map[j].inv_vars, "", "\n");
5948             }
5949
5950           fprintf (dump_file, "\n");
5951         }
5952       fprintf (dump_file, "\n");
5953     }
5954 }
5955
5956 /* Determines cost of the candidate CAND.  */
5957
5958 static void
5959 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5960 {
5961   comp_cost cost_base;
5962   int64_t cost, cost_step;
5963   tree base;
5964
5965   gcc_assert (cand->iv != NULL);
5966
5967   /* There are two costs associated with the candidate -- its increment
5968      and its initialization.  The second is almost negligible for any loop
5969      that rolls enough, so we take it just very little into account.  */
5970
5971   base = cand->iv->base;
5972   cost_base = force_var_cost (data, base, NULL);
5973   /* It will be exceptional that the iv register happens to be initialized with
5974      the proper value at no cost.  In general, there will at least be a regcopy
5975      or a const set.  */
5976   if (cost_base.cost == 0)
5977     cost_base.cost = COSTS_N_INSNS (1);
5978   /* Doloop decrement should be considered as zero cost.  */
5979   if (cand->doloop_p)
5980     cost_step = 0;
5981   else
5982     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5983   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5984
5985   /* Prefer the original ivs unless we may gain something by replacing it.
5986      The reason is to make debugging simpler; so this is not relevant for
5987      artificial ivs created by other optimization passes.  */
5988   if ((cand->pos != IP_ORIGINAL
5989        || !SSA_NAME_VAR (cand->var_before)
5990        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5991       /* Prefer doloop as well.  */
5992       && !cand->doloop_p)
5993     cost++;
5994
5995   /* Prefer not to insert statements into latch unless there are some
5996      already (so that we do not create unnecessary jumps).  */
5997   if (cand->pos == IP_END
5998       && empty_block_p (ip_end_pos (data->current_loop)))
5999     cost++;
6000
6001   cand->cost = cost;
6002   cand->cost_step = cost_step;
6003 }
6004
6005 /* Determines costs of computation of the candidates.  */
6006
6007 static void
6008 determine_iv_costs (struct ivopts_data *data)
6009 {
6010   unsigned i;
6011
6012   if (dump_file && (dump_flags & TDF_DETAILS))
6013     {
6014       fprintf (dump_file, "<Candidate Costs>:\n");
6015       fprintf (dump_file, "  cand\tcost\n");
6016     }
6017
6018   for (i = 0; i < data->vcands.length (); i++)
6019     {
6020       struct iv_cand *cand = data->vcands[i];
6021
6022       determine_iv_cost (data, cand);
6023
6024       if (dump_file && (dump_flags & TDF_DETAILS))
6025         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
6026     }
6027
6028   if (dump_file && (dump_flags & TDF_DETAILS))
6029     fprintf (dump_file, "\n");
6030 }
6031
6032 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6033    induction variables.  Note N_INVS includes both invariant variables and
6034    invariant expressions.  */
6035
6036 static unsigned
6037 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6038                               unsigned n_cands)
6039 {
6040   unsigned cost;
6041   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6042   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6043   bool speed = data->speed;
6044
6045   /* If there is a call in the loop body, the call-clobbered registers
6046      are not available for loop invariants.  */
6047   if (data->body_includes_call)
6048     available_regs = available_regs - target_clobbered_regs;
6049
6050   /* If we have enough registers.  */
6051   if (regs_needed + target_res_regs < available_regs)
6052     cost = n_new;
6053   /* If close to running out of registers, try to preserve them.  */
6054   else if (regs_needed <= available_regs)
6055     cost = target_reg_cost [speed] * regs_needed;
6056   /* If we run out of available registers but the number of candidates
6057      does not, we penalize extra registers using target_spill_cost.  */
6058   else if (n_cands <= available_regs)
6059     cost = target_reg_cost [speed] * available_regs
6060            + target_spill_cost [speed] * (regs_needed - available_regs);
6061   /* If the number of candidates runs out available registers, we penalize
6062      extra candidate registers using target_spill_cost * 2.  Because it is
6063      more expensive to spill induction variable than invariant.  */
6064   else
6065     cost = target_reg_cost [speed] * available_regs
6066            + target_spill_cost [speed] * (n_cands - available_regs) * 2
6067            + target_spill_cost [speed] * (regs_needed - n_cands);
6068
6069   /* Finally, add the number of candidates, so that we prefer eliminating
6070      induction variables if possible.  */
6071   return cost + n_cands;
6072 }
6073
6074 /* For each size of the induction variable set determine the penalty.  */
6075
6076 static void
6077 determine_set_costs (struct ivopts_data *data)
6078 {
6079   unsigned j, n;
6080   gphi *phi;
6081   gphi_iterator psi;
6082   tree op;
6083   class loop *loop = data->current_loop;
6084   bitmap_iterator bi;
6085
6086   if (dump_file && (dump_flags & TDF_DETAILS))
6087     {
6088       fprintf (dump_file, "<Global Costs>:\n");
6089       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6090       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6091       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6092       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6093     }
6094
6095   n = 0;
6096   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6097     {
6098       phi = psi.phi ();
6099       op = PHI_RESULT (phi);
6100
6101       if (virtual_operand_p (op))
6102         continue;
6103
6104       if (get_iv (data, op))
6105         continue;
6106
6107       if (!POINTER_TYPE_P (TREE_TYPE (op))
6108           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6109         continue;
6110
6111       n++;
6112     }
6113
6114   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6115     {
6116       struct version_info *info = ver_info (data, j);
6117
6118       if (info->inv_id && info->has_nonlin_use)
6119         n++;
6120     }
6121
6122   data->regs_used = n;
6123   if (dump_file && (dump_flags & TDF_DETAILS))
6124     fprintf (dump_file, "  regs_used %d\n", n);
6125
6126   if (dump_file && (dump_flags & TDF_DETAILS))
6127     {
6128       fprintf (dump_file, "  cost for size:\n");
6129       fprintf (dump_file, "  ivs\tcost\n");
6130       for (j = 0; j <= 2 * target_avail_regs; j++)
6131         fprintf (dump_file, "  %d\t%d\n", j,
6132                  ivopts_estimate_reg_pressure (data, 0, j));
6133       fprintf (dump_file, "\n");
6134     }
6135 }
6136
6137 /* Returns true if A is a cheaper cost pair than B.  */
6138
6139 static bool
6140 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6141 {
6142   if (!a)
6143     return false;
6144
6145   if (!b)
6146     return true;
6147
6148   if (a->cost < b->cost)
6149     return true;
6150
6151   if (b->cost < a->cost)
6152     return false;
6153
6154   /* In case the costs are the same, prefer the cheaper candidate.  */
6155   if (a->cand->cost < b->cand->cost)
6156     return true;
6157
6158   return false;
6159 }
6160
6161 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6162    for more expensive, equal and cheaper respectively.  */
6163
6164 static int
6165 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6166 {
6167   if (cheaper_cost_pair (a, b))
6168     return -1;
6169   if (cheaper_cost_pair (b, a))
6170     return 1;
6171
6172   return 0;
6173 }
6174
6175 /* Returns candidate by that USE is expressed in IVS.  */
6176
6177 static class cost_pair *
6178 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6179 {
6180   return ivs->cand_for_group[group->id];
6181 }
6182
6183 /* Computes the cost field of IVS structure.  */
6184
6185 static void
6186 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6187 {
6188   comp_cost cost = ivs->cand_use_cost;
6189
6190   cost += ivs->cand_cost;
6191   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6192   ivs->cost = cost;
6193 }
6194
6195 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6196    and IVS.  */
6197
6198 static void
6199 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6200 {
6201   bitmap_iterator bi;
6202   unsigned iid;
6203
6204   if (!invs)
6205     return;
6206
6207   gcc_assert (n_inv_uses != NULL);
6208   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6209     {
6210       n_inv_uses[iid]--;
6211       if (n_inv_uses[iid] == 0)
6212         ivs->n_invs--;
6213     }
6214 }
6215
6216 /* Set USE not to be expressed by any candidate in IVS.  */
6217
6218 static void
6219 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6220                  struct iv_group *group)
6221 {
6222   unsigned gid = group->id, cid;
6223   class cost_pair *cp;
6224
6225   cp = ivs->cand_for_group[gid];
6226   if (!cp)
6227     return;
6228   cid = cp->cand->id;
6229
6230   ivs->bad_groups++;
6231   ivs->cand_for_group[gid] = NULL;
6232   ivs->n_cand_uses[cid]--;
6233
6234   if (ivs->n_cand_uses[cid] == 0)
6235     {
6236       bitmap_clear_bit (ivs->cands, cid);
6237       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6238         ivs->n_cands--;
6239       ivs->cand_cost -= cp->cand->cost;
6240       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6241       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6242     }
6243
6244   ivs->cand_use_cost -= cp->cost;
6245   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6246   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6247   iv_ca_recount_cost (data, ivs);
6248 }
6249
6250 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6251    IVS.  */
6252
6253 static void
6254 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6255 {
6256   bitmap_iterator bi;
6257   unsigned iid;
6258
6259   if (!invs)
6260     return;
6261
6262   gcc_assert (n_inv_uses != NULL);
6263   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6264     {
6265       n_inv_uses[iid]++;
6266       if (n_inv_uses[iid] == 1)
6267         ivs->n_invs++;
6268     }
6269 }
6270
6271 /* Set cost pair for GROUP in set IVS to CP.  */
6272
6273 static void
6274 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6275               struct iv_group *group, class cost_pair *cp)
6276 {
6277   unsigned gid = group->id, cid;
6278
6279   if (ivs->cand_for_group[gid] == cp)
6280     return;
6281
6282   if (ivs->cand_for_group[gid])
6283     iv_ca_set_no_cp (data, ivs, group);
6284
6285   if (cp)
6286     {
6287       cid = cp->cand->id;
6288
6289       ivs->bad_groups--;
6290       ivs->cand_for_group[gid] = cp;
6291       ivs->n_cand_uses[cid]++;
6292       if (ivs->n_cand_uses[cid] == 1)
6293         {
6294           bitmap_set_bit (ivs->cands, cid);
6295           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6296             ivs->n_cands++;
6297           ivs->cand_cost += cp->cand->cost;
6298           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6299           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6300         }
6301
6302       ivs->cand_use_cost += cp->cost;
6303       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6304       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6305       iv_ca_recount_cost (data, ivs);
6306     }
6307 }
6308
6309 /* Extend set IVS by expressing USE by some of the candidates in it
6310    if possible.  Consider all important candidates if candidates in
6311    set IVS don't give any result.  */
6312
6313 static void
6314 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6315                struct iv_group *group)
6316 {
6317   class cost_pair *best_cp = NULL, *cp;
6318   bitmap_iterator bi;
6319   unsigned i;
6320   struct iv_cand *cand;
6321
6322   gcc_assert (ivs->upto >= group->id);
6323   ivs->upto++;
6324   ivs->bad_groups++;
6325
6326   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6327     {
6328       cand = data->vcands[i];
6329       cp = get_group_iv_cost (data, group, cand);
6330       if (cheaper_cost_pair (cp, best_cp))
6331         best_cp = cp;
6332     }
6333
6334   if (best_cp == NULL)
6335     {
6336       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6337         {
6338           cand = data->vcands[i];
6339           cp = get_group_iv_cost (data, group, cand);
6340           if (cheaper_cost_pair (cp, best_cp))
6341             best_cp = cp;
6342         }
6343     }
6344
6345   iv_ca_set_cp (data, ivs, group, best_cp);
6346 }
6347
6348 /* Get cost for assignment IVS.  */
6349
6350 static comp_cost
6351 iv_ca_cost (class iv_ca *ivs)
6352 {
6353   /* This was a conditional expression but it triggered a bug in
6354      Sun C 5.5.  */
6355   if (ivs->bad_groups)
6356     return infinite_cost;
6357   else
6358     return ivs->cost;
6359 }
6360
6361 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6362    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6363    respectively.  */
6364
6365 static int
6366 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6367                     struct iv_group *group, class cost_pair *old_cp,
6368                     class cost_pair *new_cp)
6369 {
6370   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6371   unsigned old_n_invs = ivs->n_invs;
6372   iv_ca_set_cp (data, ivs, group, new_cp);
6373   unsigned new_n_invs = ivs->n_invs;
6374   iv_ca_set_cp (data, ivs, group, old_cp);
6375
6376   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6377 }
6378
6379 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6380    it before NEXT.  */
6381
6382 static struct iv_ca_delta *
6383 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6384                  class cost_pair *new_cp, struct iv_ca_delta *next)
6385 {
6386   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6387
6388   change->group = group;
6389   change->old_cp = old_cp;
6390   change->new_cp = new_cp;
6391   change->next = next;
6392
6393   return change;
6394 }
6395
6396 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6397    are rewritten.  */
6398
6399 static struct iv_ca_delta *
6400 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6401 {
6402   struct iv_ca_delta *last;
6403
6404   if (!l2)
6405     return l1;
6406
6407   if (!l1)
6408     return l2;
6409
6410   for (last = l1; last->next; last = last->next)
6411     continue;
6412   last->next = l2;
6413
6414   return l1;
6415 }
6416
6417 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6418
6419 static struct iv_ca_delta *
6420 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6421 {
6422   struct iv_ca_delta *act, *next, *prev = NULL;
6423
6424   for (act = delta; act; act = next)
6425     {
6426       next = act->next;
6427       act->next = prev;
6428       prev = act;
6429
6430       std::swap (act->old_cp, act->new_cp);
6431     }
6432
6433   return prev;
6434 }
6435
6436 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6437    reverted instead.  */
6438
6439 static void
6440 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6441                     struct iv_ca_delta *delta, bool forward)
6442 {
6443   class cost_pair *from, *to;
6444   struct iv_ca_delta *act;
6445
6446   if (!forward)
6447     delta = iv_ca_delta_reverse (delta);
6448
6449   for (act = delta; act; act = act->next)
6450     {
6451       from = act->old_cp;
6452       to = act->new_cp;
6453       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6454       iv_ca_set_cp (data, ivs, act->group, to);
6455     }
6456
6457   if (!forward)
6458     iv_ca_delta_reverse (delta);
6459 }
6460
6461 /* Returns true if CAND is used in IVS.  */
6462
6463 static bool
6464 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6465 {
6466   return ivs->n_cand_uses[cand->id] > 0;
6467 }
6468
6469 /* Returns number of induction variable candidates in the set IVS.  */
6470
6471 static unsigned
6472 iv_ca_n_cands (class iv_ca *ivs)
6473 {
6474   return ivs->n_cands;
6475 }
6476
6477 /* Free the list of changes DELTA.  */
6478
6479 static void
6480 iv_ca_delta_free (struct iv_ca_delta **delta)
6481 {
6482   struct iv_ca_delta *act, *next;
6483
6484   for (act = *delta; act; act = next)
6485     {
6486       next = act->next;
6487       free (act);
6488     }
6489
6490   *delta = NULL;
6491 }
6492
6493 /* Allocates new iv candidates assignment.  */
6494
6495 static class iv_ca *
6496 iv_ca_new (struct ivopts_data *data)
6497 {
6498   class iv_ca *nw = XNEW (class iv_ca);
6499
6500   nw->upto = 0;
6501   nw->bad_groups = 0;
6502   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6503                                  data->vgroups.length ());
6504   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6505   nw->cands = BITMAP_ALLOC (NULL);
6506   nw->n_cands = 0;
6507   nw->n_invs = 0;
6508   nw->cand_use_cost = no_cost;
6509   nw->cand_cost = 0;
6510   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6511   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6512   nw->cost = no_cost;
6513
6514   return nw;
6515 }
6516
6517 /* Free memory occupied by the set IVS.  */
6518
6519 static void
6520 iv_ca_free (class iv_ca **ivs)
6521 {
6522   free ((*ivs)->cand_for_group);
6523   free ((*ivs)->n_cand_uses);
6524   BITMAP_FREE ((*ivs)->cands);
6525   free ((*ivs)->n_inv_var_uses);
6526   free ((*ivs)->n_inv_expr_uses);
6527   free (*ivs);
6528   *ivs = NULL;
6529 }
6530
6531 /* Dumps IVS to FILE.  */
6532
6533 static void
6534 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6535 {
6536   unsigned i;
6537   comp_cost cost = iv_ca_cost (ivs);
6538
6539   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6540            cost.complexity);
6541   fprintf (file, "  reg_cost: %d\n",
6542            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6543   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6544            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6545            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6546   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6547
6548   for (i = 0; i < ivs->upto; i++)
6549     {
6550       struct iv_group *group = data->vgroups[i];
6551       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6552       if (cp)
6553         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6554                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6555                  cp->cost.cost, cp->cost.complexity);
6556       else
6557         fprintf (file, "   group:%d --> ??\n", group->id);
6558     }
6559
6560   const char *pref = "";
6561   fprintf (file, "  invariant variables: ");
6562   for (i = 1; i <= data->max_inv_var_id; i++)
6563     if (ivs->n_inv_var_uses[i])
6564       {
6565         fprintf (file, "%s%d", pref, i);
6566         pref = ", ";
6567       }
6568
6569   pref = "";
6570   fprintf (file, "\n  invariant expressions: ");
6571   for (i = 1; i <= data->max_inv_expr_id; i++)
6572     if (ivs->n_inv_expr_uses[i])
6573       {
6574         fprintf (file, "%s%d", pref, i);
6575         pref = ", ";
6576       }
6577
6578   fprintf (file, "\n\n");
6579 }
6580
6581 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6582    new set, and store differences in DELTA.  Number of induction variables
6583    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6584    the function will try to find a solution with mimimal iv candidates.  */
6585
6586 static comp_cost
6587 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6588               struct iv_cand *cand, struct iv_ca_delta **delta,
6589               unsigned *n_ivs, bool min_ncand)
6590 {
6591   unsigned i;
6592   comp_cost cost;
6593   struct iv_group *group;
6594   class cost_pair *old_cp, *new_cp;
6595
6596   *delta = NULL;
6597   for (i = 0; i < ivs->upto; i++)
6598     {
6599       group = data->vgroups[i];
6600       old_cp = iv_ca_cand_for_group (ivs, group);
6601
6602       if (old_cp
6603           && old_cp->cand == cand)
6604         continue;
6605
6606       new_cp = get_group_iv_cost (data, group, cand);
6607       if (!new_cp)
6608         continue;
6609
6610       if (!min_ncand)
6611         {
6612           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6613           /* Skip if new_cp depends on more invariants.  */
6614           if (cmp_invs > 0)
6615             continue;
6616
6617           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6618           /* Skip if new_cp is not cheaper.  */
6619           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6620             continue;
6621         }
6622
6623       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6624     }
6625
6626   iv_ca_delta_commit (data, ivs, *delta, true);
6627   cost = iv_ca_cost (ivs);
6628   if (n_ivs)
6629     *n_ivs = iv_ca_n_cands (ivs);
6630   iv_ca_delta_commit (data, ivs, *delta, false);
6631
6632   return cost;
6633 }
6634
6635 /* Try narrowing set IVS by removing CAND.  Return the cost of
6636    the new set and store the differences in DELTA.  START is
6637    the candidate with which we start narrowing.  */
6638
6639 static comp_cost
6640 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6641               struct iv_cand *cand, struct iv_cand *start,
6642               struct iv_ca_delta **delta)
6643 {
6644   unsigned i, ci;
6645   struct iv_group *group;
6646   class cost_pair *old_cp, *new_cp, *cp;
6647   bitmap_iterator bi;
6648   struct iv_cand *cnd;
6649   comp_cost cost, best_cost, acost;
6650
6651   *delta = NULL;
6652   for (i = 0; i < data->vgroups.length (); i++)
6653     {
6654       group = data->vgroups[i];
6655
6656       old_cp = iv_ca_cand_for_group (ivs, group);
6657       if (old_cp->cand != cand)
6658         continue;
6659
6660       best_cost = iv_ca_cost (ivs);
6661       /* Start narrowing with START.  */
6662       new_cp = get_group_iv_cost (data, group, start);
6663
6664       if (data->consider_all_candidates)
6665         {
6666           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6667             {
6668               if (ci == cand->id || (start && ci == start->id))
6669                 continue;
6670
6671               cnd = data->vcands[ci];
6672
6673               cp = get_group_iv_cost (data, group, cnd);
6674               if (!cp)
6675                 continue;
6676
6677               iv_ca_set_cp (data, ivs, group, cp);
6678               acost = iv_ca_cost (ivs);
6679
6680               if (acost < best_cost)
6681                 {
6682                   best_cost = acost;
6683                   new_cp = cp;
6684                 }
6685             }
6686         }
6687       else
6688         {
6689           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6690             {
6691               if (ci == cand->id || (start && ci == start->id))
6692                 continue;
6693
6694               cnd = data->vcands[ci];
6695
6696               cp = get_group_iv_cost (data, group, cnd);
6697               if (!cp)
6698                 continue;
6699
6700               iv_ca_set_cp (data, ivs, group, cp);
6701               acost = iv_ca_cost (ivs);
6702
6703               if (acost < best_cost)
6704                 {
6705                   best_cost = acost;
6706                   new_cp = cp;
6707                 }
6708             }
6709         }
6710       /* Restore to old cp for use.  */
6711       iv_ca_set_cp (data, ivs, group, old_cp);
6712
6713       if (!new_cp)
6714         {
6715           iv_ca_delta_free (delta);
6716           return infinite_cost;
6717         }
6718
6719       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6720     }
6721
6722   iv_ca_delta_commit (data, ivs, *delta, true);
6723   cost = iv_ca_cost (ivs);
6724   iv_ca_delta_commit (data, ivs, *delta, false);
6725
6726   return cost;
6727 }
6728
6729 /* Try optimizing the set of candidates IVS by removing candidates different
6730    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6731    differences in DELTA.  */
6732
6733 static comp_cost
6734 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6735              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6736 {
6737   bitmap_iterator bi;
6738   struct iv_ca_delta *act_delta, *best_delta;
6739   unsigned i;
6740   comp_cost best_cost, acost;
6741   struct iv_cand *cand;
6742
6743   best_delta = NULL;
6744   best_cost = iv_ca_cost (ivs);
6745
6746   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6747     {
6748       cand = data->vcands[i];
6749
6750       if (cand == except_cand)
6751         continue;
6752
6753       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6754
6755       if (acost < best_cost)
6756         {
6757           best_cost = acost;
6758           iv_ca_delta_free (&best_delta);
6759           best_delta = act_delta;
6760         }
6761       else
6762         iv_ca_delta_free (&act_delta);
6763     }
6764
6765   if (!best_delta)
6766     {
6767       *delta = NULL;
6768       return best_cost;
6769     }
6770
6771   /* Recurse to possibly remove other unnecessary ivs.  */
6772   iv_ca_delta_commit (data, ivs, best_delta, true);
6773   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6774   iv_ca_delta_commit (data, ivs, best_delta, false);
6775   *delta = iv_ca_delta_join (best_delta, *delta);
6776   return best_cost;
6777 }
6778
6779 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6780    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6781    the corresponding cost_pair, otherwise just return BEST_CP.  */
6782
6783 static class cost_pair*
6784 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6785                         unsigned int cand_idx, struct iv_cand *old_cand,
6786                         class cost_pair *best_cp)
6787 {
6788   struct iv_cand *cand;
6789   class cost_pair *cp;
6790
6791   gcc_assert (old_cand != NULL && best_cp != NULL);
6792   if (cand_idx == old_cand->id)
6793     return best_cp;
6794
6795   cand = data->vcands[cand_idx];
6796   cp = get_group_iv_cost (data, group, cand);
6797   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6798     return cp;
6799
6800   return best_cp;
6801 }
6802
6803 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6804    which are used by more than one iv uses.  For each of those candidates,
6805    this function tries to represent iv uses under that candidate using
6806    other ones with lower local cost, then tries to prune the new set.
6807    If the new set has lower cost, It returns the new cost after recording
6808    candidate replacement in list DELTA.  */
6809
6810 static comp_cost
6811 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6812                struct iv_ca_delta **delta)
6813 {
6814   bitmap_iterator bi, bj;
6815   unsigned int i, j, k;
6816   struct iv_cand *cand;
6817   comp_cost orig_cost, acost;
6818   struct iv_ca_delta *act_delta, *tmp_delta;
6819   class cost_pair *old_cp, *best_cp = NULL;
6820
6821   *delta = NULL;
6822   orig_cost = iv_ca_cost (ivs);
6823
6824   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6825     {
6826       if (ivs->n_cand_uses[i] == 1
6827           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6828         continue;
6829
6830       cand = data->vcands[i];
6831
6832       act_delta = NULL;
6833       /*  Represent uses under current candidate using other ones with
6834           lower local cost.  */
6835       for (j = 0; j < ivs->upto; j++)
6836         {
6837           struct iv_group *group = data->vgroups[j];
6838           old_cp = iv_ca_cand_for_group (ivs, group);
6839
6840           if (old_cp->cand != cand)
6841             continue;
6842
6843           best_cp = old_cp;
6844           if (data->consider_all_candidates)
6845             for (k = 0; k < data->vcands.length (); k++)
6846               best_cp = cheaper_cost_with_cand (data, group, k,
6847                                                 old_cp->cand, best_cp);
6848           else
6849             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6850               best_cp = cheaper_cost_with_cand (data, group, k,
6851                                                 old_cp->cand, best_cp);
6852
6853           if (best_cp == old_cp)
6854             continue;
6855
6856           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6857         }
6858       /* No need for further prune.  */
6859       if (!act_delta)
6860         continue;
6861
6862       /* Prune the new candidate set.  */
6863       iv_ca_delta_commit (data, ivs, act_delta, true);
6864       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6865       iv_ca_delta_commit (data, ivs, act_delta, false);
6866       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6867
6868       if (acost < orig_cost)
6869         {
6870           *delta = act_delta;
6871           return acost;
6872         }
6873       else
6874         iv_ca_delta_free (&act_delta);
6875     }
6876
6877   return orig_cost;
6878 }
6879
6880 /* Tries to extend the sets IVS in the best possible way in order to
6881    express the GROUP.  If ORIGINALP is true, prefer candidates from
6882    the original set of IVs, otherwise favor important candidates not
6883    based on any memory object.  */
6884
6885 static bool
6886 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6887                   struct iv_group *group, bool originalp)
6888 {
6889   comp_cost best_cost, act_cost;
6890   unsigned i;
6891   bitmap_iterator bi;
6892   struct iv_cand *cand;
6893   struct iv_ca_delta *best_delta = NULL, *act_delta;
6894   class cost_pair *cp;
6895
6896   iv_ca_add_group (data, ivs, group);
6897   best_cost = iv_ca_cost (ivs);
6898   cp = iv_ca_cand_for_group (ivs, group);
6899   if (cp)
6900     {
6901       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6902       iv_ca_set_no_cp (data, ivs, group);
6903     }
6904
6905   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6906      first try important candidates not based on any memory object.  Only if
6907      this fails, try the specific ones.  Rationale -- in loops with many
6908      variables the best choice often is to use just one generic biv.  If we
6909      added here many ivs specific to the uses, the optimization algorithm later
6910      would be likely to get stuck in a local minimum, thus causing us to create
6911      too many ivs.  The approach from few ivs to more seems more likely to be
6912      successful -- starting from few ivs, replacing an expensive use by a
6913      specific iv should always be a win.  */
6914   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6915     {
6916       cand = data->vcands[i];
6917
6918       if (originalp && cand->pos !=IP_ORIGINAL)
6919         continue;
6920
6921       if (!originalp && cand->iv->base_object != NULL_TREE)
6922         continue;
6923
6924       if (iv_ca_cand_used_p (ivs, cand))
6925         continue;
6926
6927       cp = get_group_iv_cost (data, group, cand);
6928       if (!cp)
6929         continue;
6930
6931       iv_ca_set_cp (data, ivs, group, cp);
6932       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6933                                true);
6934       iv_ca_set_no_cp (data, ivs, group);
6935       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6936
6937       if (act_cost < best_cost)
6938         {
6939           best_cost = act_cost;
6940
6941           iv_ca_delta_free (&best_delta);
6942           best_delta = act_delta;
6943         }
6944       else
6945         iv_ca_delta_free (&act_delta);
6946     }
6947
6948   if (best_cost.infinite_cost_p ())
6949     {
6950       for (i = 0; i < group->n_map_members; i++)
6951         {
6952           cp = group->cost_map + i;
6953           cand = cp->cand;
6954           if (!cand)
6955             continue;
6956
6957           /* Already tried this.  */
6958           if (cand->important)
6959             {
6960               if (originalp && cand->pos == IP_ORIGINAL)
6961                 continue;
6962               if (!originalp && cand->iv->base_object == NULL_TREE)
6963                 continue;
6964             }
6965
6966           if (iv_ca_cand_used_p (ivs, cand))
6967             continue;
6968
6969           act_delta = NULL;
6970           iv_ca_set_cp (data, ivs, group, cp);
6971           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6972           iv_ca_set_no_cp (data, ivs, group);
6973           act_delta = iv_ca_delta_add (group,
6974                                        iv_ca_cand_for_group (ivs, group),
6975                                        cp, act_delta);
6976
6977           if (act_cost < best_cost)
6978             {
6979               best_cost = act_cost;
6980
6981               if (best_delta)
6982                 iv_ca_delta_free (&best_delta);
6983               best_delta = act_delta;
6984             }
6985           else
6986             iv_ca_delta_free (&act_delta);
6987         }
6988     }
6989
6990   iv_ca_delta_commit (data, ivs, best_delta, true);
6991   iv_ca_delta_free (&best_delta);
6992
6993   return !best_cost.infinite_cost_p ();
6994 }
6995
6996 /* Finds an initial assignment of candidates to uses.  */
6997
6998 static class iv_ca *
6999 get_initial_solution (struct ivopts_data *data, bool originalp)
7000 {
7001   unsigned i;
7002   class iv_ca *ivs = iv_ca_new (data);
7003
7004   for (i = 0; i < data->vgroups.length (); i++)
7005     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7006       {
7007         iv_ca_free (&ivs);
7008         return NULL;
7009       }
7010
7011   return ivs;
7012 }
7013
7014 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
7015    points to a bool variable, this function tries to break local
7016    optimal fixed-point by replacing candidates in IVS if it's true.  */
7017
7018 static bool
7019 try_improve_iv_set (struct ivopts_data *data,
7020                     class iv_ca *ivs, bool *try_replace_p)
7021 {
7022   unsigned i, n_ivs;
7023   comp_cost acost, best_cost = iv_ca_cost (ivs);
7024   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7025   struct iv_cand *cand;
7026
7027   /* Try extending the set of induction variables by one.  */
7028   for (i = 0; i < data->vcands.length (); i++)
7029     {
7030       cand = data->vcands[i];
7031
7032       if (iv_ca_cand_used_p (ivs, cand))
7033         continue;
7034
7035       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7036       if (!act_delta)
7037         continue;
7038
7039       /* If we successfully added the candidate and the set is small enough,
7040          try optimizing it by removing other candidates.  */
7041       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7042         {
7043           iv_ca_delta_commit (data, ivs, act_delta, true);
7044           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7045           iv_ca_delta_commit (data, ivs, act_delta, false);
7046           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7047         }
7048
7049       if (acost < best_cost)
7050         {
7051           best_cost = acost;
7052           iv_ca_delta_free (&best_delta);
7053           best_delta = act_delta;
7054         }
7055       else
7056         iv_ca_delta_free (&act_delta);
7057     }
7058
7059   if (!best_delta)
7060     {
7061       /* Try removing the candidates from the set instead.  */
7062       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7063
7064       if (!best_delta && *try_replace_p)
7065         {
7066           *try_replace_p = false;
7067           /* So far candidate selecting algorithm tends to choose fewer IVs
7068              so that it can handle cases in which loops have many variables
7069              but the best choice is often to use only one general biv.  One
7070              weakness is it can't handle opposite cases, in which different
7071              candidates should be chosen with respect to each use.  To solve
7072              the problem, we replace candidates in a manner described by the
7073              comments of iv_ca_replace, thus give general algorithm a chance
7074              to break local optimal fixed-point in these cases.  */
7075           best_cost = iv_ca_replace (data, ivs, &best_delta);
7076         }
7077
7078       if (!best_delta)
7079         return false;
7080     }
7081
7082   iv_ca_delta_commit (data, ivs, best_delta, true);
7083   iv_ca_delta_free (&best_delta);
7084   return best_cost == iv_ca_cost (ivs);
7085 }
7086
7087 /* Attempts to find the optimal set of induction variables.  We do simple
7088    greedy heuristic -- we try to replace at most one candidate in the selected
7089    solution and remove the unused ivs while this improves the cost.  */
7090
7091 static class iv_ca *
7092 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7093 {
7094   class iv_ca *set;
7095   bool try_replace_p = true;
7096
7097   /* Get the initial solution.  */
7098   set = get_initial_solution (data, originalp);
7099   if (!set)
7100     {
7101       if (dump_file && (dump_flags & TDF_DETAILS))
7102         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7103       return NULL;
7104     }
7105
7106   if (dump_file && (dump_flags & TDF_DETAILS))
7107     {
7108       fprintf (dump_file, "Initial set of candidates:\n");
7109       iv_ca_dump (data, dump_file, set);
7110     }
7111
7112   while (try_improve_iv_set (data, set, &try_replace_p))
7113     {
7114       if (dump_file && (dump_flags & TDF_DETAILS))
7115         {
7116           fprintf (dump_file, "Improved to:\n");
7117           iv_ca_dump (data, dump_file, set);
7118         }
7119     }
7120
7121   /* If the set has infinite_cost, it can't be optimal.  */
7122   if (iv_ca_cost (set).infinite_cost_p ())
7123     {
7124       if (dump_file && (dump_flags & TDF_DETAILS))
7125         fprintf (dump_file,
7126                  "Overflow to infinite cost in try_improve_iv_set.\n");
7127       iv_ca_free (&set);
7128     }
7129   return set;
7130 }
7131
7132 static class iv_ca *
7133 find_optimal_iv_set (struct ivopts_data *data)
7134 {
7135   unsigned i;
7136   comp_cost cost, origcost;
7137   class iv_ca *set, *origset;
7138
7139   /* Determine the cost based on a strategy that starts with original IVs,
7140      and try again using a strategy that prefers candidates not based
7141      on any IVs.  */
7142   origset = find_optimal_iv_set_1 (data, true);
7143   set = find_optimal_iv_set_1 (data, false);
7144
7145   if (!origset && !set)
7146     return NULL;
7147
7148   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7149   cost = set ? iv_ca_cost (set) : infinite_cost;
7150
7151   if (dump_file && (dump_flags & TDF_DETAILS))
7152     {
7153       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7154                origcost.cost, origcost.complexity);
7155       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7156                cost.cost, cost.complexity);
7157     }
7158
7159   /* Choose the one with the best cost.  */
7160   if (origcost <= cost)
7161     {
7162       if (set)
7163         iv_ca_free (&set);
7164       set = origset;
7165     }
7166   else if (origset)
7167     iv_ca_free (&origset);
7168
7169   for (i = 0; i < data->vgroups.length (); i++)
7170     {
7171       struct iv_group *group = data->vgroups[i];
7172       group->selected = iv_ca_cand_for_group (set, group)->cand;
7173     }
7174
7175   return set;
7176 }
7177
7178 /* Creates a new induction variable corresponding to CAND.  */
7179
7180 static void
7181 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7182 {
7183   gimple_stmt_iterator incr_pos;
7184   tree base;
7185   struct iv_use *use;
7186   struct iv_group *group;
7187   bool after = false;
7188
7189   gcc_assert (cand->iv != NULL);
7190
7191   switch (cand->pos)
7192     {
7193     case IP_NORMAL:
7194       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7195       break;
7196
7197     case IP_END:
7198       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7199       after = true;
7200       break;
7201
7202     case IP_AFTER_USE:
7203       after = true;
7204       /* fall through */
7205     case IP_BEFORE_USE:
7206       incr_pos = gsi_for_stmt (cand->incremented_at);
7207       break;
7208
7209     case IP_ORIGINAL:
7210       /* Mark that the iv is preserved.  */
7211       name_info (data, cand->var_before)->preserve_biv = true;
7212       name_info (data, cand->var_after)->preserve_biv = true;
7213
7214       /* Rewrite the increment so that it uses var_before directly.  */
7215       use = find_interesting_uses_op (data, cand->var_after);
7216       group = data->vgroups[use->group_id];
7217       group->selected = cand;
7218       return;
7219     }
7220
7221   gimple_add_tmp_var (cand->var_before);
7222
7223   base = unshare_expr (cand->iv->base);
7224
7225   create_iv (base, unshare_expr (cand->iv->step),
7226              cand->var_before, data->current_loop,
7227              &incr_pos, after, &cand->var_before, &cand->var_after);
7228 }
7229
7230 /* Creates new induction variables described in SET.  */
7231
7232 static void
7233 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7234 {
7235   unsigned i;
7236   struct iv_cand *cand;
7237   bitmap_iterator bi;
7238
7239   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7240     {
7241       cand = data->vcands[i];
7242       create_new_iv (data, cand);
7243     }
7244
7245   if (dump_file && (dump_flags & TDF_DETAILS))
7246     {
7247       fprintf (dump_file, "Selected IV set for loop %d",
7248                data->current_loop->num);
7249       if (data->loop_loc != UNKNOWN_LOCATION)
7250         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7251                  LOCATION_LINE (data->loop_loc));
7252       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7253                avg_loop_niter (data->current_loop));
7254       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7255       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7256         {
7257           cand = data->vcands[i];
7258           dump_cand (dump_file, cand);
7259         }
7260       fprintf (dump_file, "\n");
7261     }
7262 }
7263
7264 /* Rewrites USE (definition of iv used in a nonlinear expression)
7265    using candidate CAND.  */
7266
7267 static void
7268 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7269                             struct iv_use *use, struct iv_cand *cand)
7270 {
7271   gassign *ass;
7272   gimple_stmt_iterator bsi;
7273   tree comp, type = get_use_type (use), tgt;
7274
7275   /* An important special case -- if we are asked to express value of
7276      the original iv by itself, just exit; there is no need to
7277      introduce a new computation (that might also need casting the
7278      variable to unsigned and back).  */
7279   if (cand->pos == IP_ORIGINAL
7280       && cand->incremented_at == use->stmt)
7281     {
7282       tree op = NULL_TREE;
7283       enum tree_code stmt_code;
7284
7285       gcc_assert (is_gimple_assign (use->stmt));
7286       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7287
7288       /* Check whether we may leave the computation unchanged.
7289          This is the case only if it does not rely on other
7290          computations in the loop -- otherwise, the computation
7291          we rely upon may be removed in remove_unused_ivs,
7292          thus leading to ICE.  */
7293       stmt_code = gimple_assign_rhs_code (use->stmt);
7294       if (stmt_code == PLUS_EXPR
7295           || stmt_code == MINUS_EXPR
7296           || stmt_code == POINTER_PLUS_EXPR)
7297         {
7298           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7299             op = gimple_assign_rhs2 (use->stmt);
7300           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7301             op = gimple_assign_rhs1 (use->stmt);
7302         }
7303
7304       if (op != NULL_TREE)
7305         {
7306           if (expr_invariant_in_loop_p (data->current_loop, op))
7307             return;
7308           if (TREE_CODE (op) == SSA_NAME)
7309             {
7310               struct iv *iv = get_iv (data, op);
7311               if (iv != NULL && integer_zerop (iv->step))
7312                 return;
7313             }
7314         }
7315     }
7316
7317   switch (gimple_code (use->stmt))
7318     {
7319     case GIMPLE_PHI:
7320       tgt = PHI_RESULT (use->stmt);
7321
7322       /* If we should keep the biv, do not replace it.  */
7323       if (name_info (data, tgt)->preserve_biv)
7324         return;
7325
7326       bsi = gsi_after_labels (gimple_bb (use->stmt));
7327       break;
7328
7329     case GIMPLE_ASSIGN:
7330       tgt = gimple_assign_lhs (use->stmt);
7331       bsi = gsi_for_stmt (use->stmt);
7332       break;
7333
7334     default:
7335       gcc_unreachable ();
7336     }
7337
7338   aff_tree aff_inv, aff_var;
7339   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7340                               use, cand, &aff_inv, &aff_var))
7341     gcc_unreachable ();
7342
7343   unshare_aff_combination (&aff_inv);
7344   unshare_aff_combination (&aff_var);
7345   /* Prefer CSE opportunity than loop invariant by adding offset at last
7346      so that iv_uses have different offsets can be CSEed.  */
7347   poly_widest_int offset = aff_inv.offset;
7348   aff_inv.offset = 0;
7349
7350   gimple_seq stmt_list = NULL, seq = NULL;
7351   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7352   tree comp_op2 = aff_combination_to_tree (&aff_var);
7353   gcc_assert (comp_op1 && comp_op2);
7354
7355   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7356   gimple_seq_add_seq (&stmt_list, seq);
7357   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7358   gimple_seq_add_seq (&stmt_list, seq);
7359
7360   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7361     std::swap (comp_op1, comp_op2);
7362
7363   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7364     {
7365       comp = fold_build_pointer_plus (comp_op1,
7366                                       fold_convert (sizetype, comp_op2));
7367       comp = fold_build_pointer_plus (comp,
7368                                       wide_int_to_tree (sizetype, offset));
7369     }
7370   else
7371     {
7372       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7373                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7374       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7375                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7376     }
7377
7378   comp = fold_convert (type, comp);
7379   comp = force_gimple_operand (comp, &seq, false, NULL);
7380   gimple_seq_add_seq (&stmt_list, seq);
7381   if (gimple_code (use->stmt) != GIMPLE_PHI
7382       /* We can't allow re-allocating the stmt as it might be pointed
7383          to still.  */
7384       && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7385           >= gimple_num_ops (gsi_stmt (bsi))))
7386     {
7387       comp = force_gimple_operand (comp, &seq, true, NULL);
7388       gimple_seq_add_seq (&stmt_list, seq);
7389       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7390         {
7391           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7392           /* As this isn't a plain copy we have to reset alignment
7393              information.  */
7394           if (SSA_NAME_PTR_INFO (comp))
7395             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7396         }
7397     }
7398
7399   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7400   if (gimple_code (use->stmt) == GIMPLE_PHI)
7401     {
7402       ass = gimple_build_assign (tgt, comp);
7403       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7404
7405       bsi = gsi_for_stmt (use->stmt);
7406       remove_phi_node (&bsi, false);
7407     }
7408   else
7409     {
7410       gimple_assign_set_rhs_from_tree (&bsi, comp);
7411       use->stmt = gsi_stmt (bsi);
7412     }
7413 }
7414
7415 /* Performs a peephole optimization to reorder the iv update statement with
7416    a mem ref to enable instruction combining in later phases. The mem ref uses
7417    the iv value before the update, so the reordering transformation requires
7418    adjustment of the offset. CAND is the selected IV_CAND.
7419
7420    Example:
7421
7422    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7423    iv2 = iv1 + 1;
7424
7425    if (t < val)      (1)
7426      goto L;
7427    goto Head;
7428
7429
7430    directly propagating t over to (1) will introduce overlapping live range
7431    thus increase register pressure. This peephole transform it into:
7432
7433
7434    iv2 = iv1 + 1;
7435    t = MEM_REF (base, iv2, 8, 8);
7436    if (t < val)
7437      goto L;
7438    goto Head;
7439 */
7440
7441 static void
7442 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7443 {
7444   tree var_after;
7445   gimple *iv_update, *stmt;
7446   basic_block bb;
7447   gimple_stmt_iterator gsi, gsi_iv;
7448
7449   if (cand->pos != IP_NORMAL)
7450     return;
7451
7452   var_after = cand->var_after;
7453   iv_update = SSA_NAME_DEF_STMT (var_after);
7454
7455   bb = gimple_bb (iv_update);
7456   gsi = gsi_last_nondebug_bb (bb);
7457   stmt = gsi_stmt (gsi);
7458
7459   /* Only handle conditional statement for now.  */
7460   if (gimple_code (stmt) != GIMPLE_COND)
7461     return;
7462
7463   gsi_prev_nondebug (&gsi);
7464   stmt = gsi_stmt (gsi);
7465   if (stmt != iv_update)
7466     return;
7467
7468   gsi_prev_nondebug (&gsi);
7469   if (gsi_end_p (gsi))
7470     return;
7471
7472   stmt = gsi_stmt (gsi);
7473   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7474     return;
7475
7476   if (stmt != use->stmt)
7477     return;
7478
7479   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7480     return;
7481
7482   if (dump_file && (dump_flags & TDF_DETAILS))
7483     {
7484       fprintf (dump_file, "Reordering \n");
7485       print_gimple_stmt (dump_file, iv_update, 0);
7486       print_gimple_stmt (dump_file, use->stmt, 0);
7487       fprintf (dump_file, "\n");
7488     }
7489
7490   gsi = gsi_for_stmt (use->stmt);
7491   gsi_iv = gsi_for_stmt (iv_update);
7492   gsi_move_before (&gsi_iv, &gsi);
7493
7494   cand->pos = IP_BEFORE_USE;
7495   cand->incremented_at = use->stmt;
7496 }
7497
7498 /* Return the alias pointer type that should be used for a MEM_REF
7499    associated with USE, which has type USE_PTR_ADDRESS.  */
7500
7501 static tree
7502 get_alias_ptr_type_for_ptr_address (iv_use *use)
7503 {
7504   gcall *call = as_a <gcall *> (use->stmt);
7505   switch (gimple_call_internal_fn (call))
7506     {
7507     case IFN_MASK_LOAD:
7508     case IFN_MASK_STORE:
7509     case IFN_MASK_LOAD_LANES:
7510     case IFN_MASK_STORE_LANES:
7511     case IFN_LEN_LOAD:
7512     case IFN_LEN_STORE:
7513       /* The second argument contains the correct alias type.  */
7514       gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7515       return TREE_TYPE (gimple_call_arg (call, 1));
7516
7517     default:
7518       gcc_unreachable ();
7519     }
7520 }
7521
7522
7523 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7524
7525 static void
7526 rewrite_use_address (struct ivopts_data *data,
7527                      struct iv_use *use, struct iv_cand *cand)
7528 {
7529   aff_tree aff;
7530   bool ok;
7531
7532   adjust_iv_update_pos (cand, use);
7533   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7534   gcc_assert (ok);
7535   unshare_aff_combination (&aff);
7536
7537   /* To avoid undefined overflow problems, all IV candidates use unsigned
7538      integer types.  The drawback is that this makes it impossible for
7539      create_mem_ref to distinguish an IV that is based on a memory object
7540      from one that represents simply an offset.
7541
7542      To work around this problem, we pass a hint to create_mem_ref that
7543      indicates which variable (if any) in aff is an IV based on a memory
7544      object.  Note that we only consider the candidate.  If this is not
7545      based on an object, the base of the reference is in some subexpression
7546      of the use -- but these will use pointer types, so they are recognized
7547      by the create_mem_ref heuristics anyway.  */
7548   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7549   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7550   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7551   tree type = use->mem_type;
7552   tree alias_ptr_type;
7553   if (use->type == USE_PTR_ADDRESS)
7554     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7555   else
7556     {
7557       gcc_assert (type == TREE_TYPE (*use->op_p));
7558       unsigned int align = get_object_alignment (*use->op_p);
7559       if (align != TYPE_ALIGN (type))
7560         type = build_aligned_type (type, align);
7561       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7562     }
7563   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7564                              iv, base_hint, data->speed);
7565
7566   if (use->type == USE_PTR_ADDRESS)
7567     {
7568       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7569       ref = fold_convert (get_use_type (use), ref);
7570       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7571                                       true, GSI_SAME_STMT);
7572     }
7573   else
7574     copy_ref_info (ref, *use->op_p);
7575
7576   *use->op_p = ref;
7577 }
7578
7579 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7580    candidate CAND.  */
7581
7582 static void
7583 rewrite_use_compare (struct ivopts_data *data,
7584                      struct iv_use *use, struct iv_cand *cand)
7585 {
7586   tree comp, op, bound;
7587   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7588   enum tree_code compare;
7589   struct iv_group *group = data->vgroups[use->group_id];
7590   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7591
7592   bound = cp->value;
7593   if (bound)
7594     {
7595       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7596       tree var_type = TREE_TYPE (var);
7597       gimple_seq stmts;
7598
7599       if (dump_file && (dump_flags & TDF_DETAILS))
7600         {
7601           fprintf (dump_file, "Replacing exit test: ");
7602           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7603         }
7604       compare = cp->comp;
7605       bound = unshare_expr (fold_convert (var_type, bound));
7606       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7607       if (stmts)
7608         gsi_insert_seq_on_edge_immediate (
7609                 loop_preheader_edge (data->current_loop),
7610                 stmts);
7611
7612       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7613       gimple_cond_set_lhs (cond_stmt, var);
7614       gimple_cond_set_code (cond_stmt, compare);
7615       gimple_cond_set_rhs (cond_stmt, op);
7616       return;
7617     }
7618
7619   /* The induction variable elimination failed; just express the original
7620      giv.  */
7621   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7622   gcc_assert (comp != NULL_TREE);
7623   gcc_assert (use->op_p != NULL);
7624   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7625                                          SSA_NAME_VAR (*use->op_p),
7626                                          true, GSI_SAME_STMT);
7627 }
7628
7629 /* Rewrite the groups using the selected induction variables.  */
7630
7631 static void
7632 rewrite_groups (struct ivopts_data *data)
7633 {
7634   unsigned i, j;
7635
7636   for (i = 0; i < data->vgroups.length (); i++)
7637     {
7638       struct iv_group *group = data->vgroups[i];
7639       struct iv_cand *cand = group->selected;
7640
7641       gcc_assert (cand);
7642
7643       if (group->type == USE_NONLINEAR_EXPR)
7644         {
7645           for (j = 0; j < group->vuses.length (); j++)
7646             {
7647               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7648               update_stmt (group->vuses[j]->stmt);
7649             }
7650         }
7651       else if (address_p (group->type))
7652         {
7653           for (j = 0; j < group->vuses.length (); j++)
7654             {
7655               rewrite_use_address (data, group->vuses[j], cand);
7656               update_stmt (group->vuses[j]->stmt);
7657             }
7658         }
7659       else
7660         {
7661           gcc_assert (group->type == USE_COMPARE);
7662
7663           for (j = 0; j < group->vuses.length (); j++)
7664             {
7665               rewrite_use_compare (data, group->vuses[j], cand);
7666               update_stmt (group->vuses[j]->stmt);
7667             }
7668         }
7669     }
7670 }
7671
7672 /* Removes the ivs that are not used after rewriting.  */
7673
7674 static void
7675 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7676 {
7677   unsigned j;
7678   bitmap_iterator bi;
7679
7680   /* Figure out an order in which to release SSA DEFs so that we don't
7681      release something that we'd have to propagate into a debug stmt
7682      afterwards.  */
7683   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7684     {
7685       struct version_info *info;
7686
7687       info = ver_info (data, j);
7688       if (info->iv
7689           && !integer_zerop (info->iv->step)
7690           && !info->inv_id
7691           && !info->iv->nonlin_use
7692           && !info->preserve_biv)
7693         {
7694           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7695
7696           tree def = info->iv->ssa_name;
7697
7698           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7699             {
7700               imm_use_iterator imm_iter;
7701               use_operand_p use_p;
7702               gimple *stmt;
7703               int count = 0;
7704
7705               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7706                 {
7707                   if (!gimple_debug_bind_p (stmt))
7708                     continue;
7709
7710                   /* We just want to determine whether to do nothing
7711                      (count == 0), to substitute the computed
7712                      expression into a single use of the SSA DEF by
7713                      itself (count == 1), or to use a debug temp
7714                      because the SSA DEF is used multiple times or as
7715                      part of a larger expression (count > 1). */
7716                   count++;
7717                   if (gimple_debug_bind_get_value (stmt) != def)
7718                     count++;
7719
7720                   if (count > 1)
7721                     break;
7722                 }
7723
7724               if (!count)
7725                 continue;
7726
7727               struct iv_use dummy_use;
7728               struct iv_cand *best_cand = NULL, *cand;
7729               unsigned i, best_pref = 0, cand_pref;
7730               tree comp = NULL_TREE;
7731
7732               memset (&dummy_use, 0, sizeof (dummy_use));
7733               dummy_use.iv = info->iv;
7734               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7735                 {
7736                   cand = data->vgroups[i]->selected;
7737                   if (cand == best_cand)
7738                     continue;
7739                   cand_pref = operand_equal_p (cand->iv->step,
7740                                                info->iv->step, 0)
7741                     ? 4 : 0;
7742                   cand_pref
7743                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7744                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7745                     ? 2 : 0;
7746                   cand_pref
7747                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7748                     ? 1 : 0;
7749                   if (best_cand == NULL || best_pref < cand_pref)
7750                     {
7751                       tree this_comp
7752                         = get_debug_computation_at (data->current_loop,
7753                                                     SSA_NAME_DEF_STMT (def),
7754                                                     &dummy_use, cand);
7755                       if (this_comp)
7756                         {
7757                           best_cand = cand;
7758                           best_pref = cand_pref;
7759                           comp = this_comp;
7760                         }
7761                     }
7762                 }
7763
7764               if (!best_cand)
7765                 continue;
7766
7767               comp = unshare_expr (comp);
7768               if (count > 1)
7769                 {
7770                   tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7771                   /* FIXME: Is setting the mode really necessary? */
7772                   if (SSA_NAME_VAR (def))
7773                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7774                   else
7775                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7776                   gdebug *def_temp
7777                     = gimple_build_debug_bind (vexpr, comp, NULL);
7778                   gimple_stmt_iterator gsi;
7779
7780                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7781                     gsi = gsi_after_labels (gimple_bb
7782                                             (SSA_NAME_DEF_STMT (def)));
7783                   else
7784                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7785
7786                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7787                   comp = vexpr;
7788                 }
7789
7790               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7791                 {
7792                   if (!gimple_debug_bind_p (stmt))
7793                     continue;
7794
7795                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7796                     SET_USE (use_p, comp);
7797
7798                   update_stmt (stmt);
7799                 }
7800             }
7801         }
7802     }
7803 }
7804
7805 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7806    for hash_map::traverse.  */
7807
7808 bool
7809 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7810 {
7811   free (value);
7812   return true;
7813 }
7814
7815 /* Frees data allocated by the optimization of a single loop.  */
7816
7817 static void
7818 free_loop_data (struct ivopts_data *data)
7819 {
7820   unsigned i, j;
7821   bitmap_iterator bi;
7822   tree obj;
7823
7824   if (data->niters)
7825     {
7826       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7827       delete data->niters;
7828       data->niters = NULL;
7829     }
7830
7831   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7832     {
7833       struct version_info *info;
7834
7835       info = ver_info (data, i);
7836       info->iv = NULL;
7837       info->has_nonlin_use = false;
7838       info->preserve_biv = false;
7839       info->inv_id = 0;
7840     }
7841   bitmap_clear (data->relevant);
7842   bitmap_clear (data->important_candidates);
7843
7844   for (i = 0; i < data->vgroups.length (); i++)
7845     {
7846       struct iv_group *group = data->vgroups[i];
7847
7848       for (j = 0; j < group->vuses.length (); j++)
7849         free (group->vuses[j]);
7850       group->vuses.release ();
7851
7852       BITMAP_FREE (group->related_cands);
7853       for (j = 0; j < group->n_map_members; j++)
7854         {
7855           if (group->cost_map[j].inv_vars)
7856             BITMAP_FREE (group->cost_map[j].inv_vars);
7857           if (group->cost_map[j].inv_exprs)
7858             BITMAP_FREE (group->cost_map[j].inv_exprs);
7859         }
7860
7861       free (group->cost_map);
7862       free (group);
7863     }
7864   data->vgroups.truncate (0);
7865
7866   for (i = 0; i < data->vcands.length (); i++)
7867     {
7868       struct iv_cand *cand = data->vcands[i];
7869
7870       if (cand->inv_vars)
7871         BITMAP_FREE (cand->inv_vars);
7872       if (cand->inv_exprs)
7873         BITMAP_FREE (cand->inv_exprs);
7874       free (cand);
7875     }
7876   data->vcands.truncate (0);
7877
7878   if (data->version_info_size < num_ssa_names)
7879     {
7880       data->version_info_size = 2 * num_ssa_names;
7881       free (data->version_info);
7882       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7883     }
7884
7885   data->max_inv_var_id = 0;
7886   data->max_inv_expr_id = 0;
7887
7888   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7889     SET_DECL_RTL (obj, NULL_RTX);
7890
7891   decl_rtl_to_reset.truncate (0);
7892
7893   data->inv_expr_tab->empty ();
7894
7895   data->iv_common_cand_tab->empty ();
7896   data->iv_common_cands.truncate (0);
7897 }
7898
7899 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7900    loop tree.  */
7901
7902 static void
7903 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7904 {
7905   free_loop_data (data);
7906   free (data->version_info);
7907   BITMAP_FREE (data->relevant);
7908   BITMAP_FREE (data->important_candidates);
7909
7910   decl_rtl_to_reset.release ();
7911   data->vgroups.release ();
7912   data->vcands.release ();
7913   delete data->inv_expr_tab;
7914   data->inv_expr_tab = NULL;
7915   free_affine_expand_cache (&data->name_expansion_cache);
7916   if (data->base_object_map)
7917     delete data->base_object_map;
7918   delete data->iv_common_cand_tab;
7919   data->iv_common_cand_tab = NULL;
7920   data->iv_common_cands.release ();
7921   obstack_free (&data->iv_obstack, NULL);
7922 }
7923
7924 /* Returns true if the loop body BODY includes any function calls.  */
7925
7926 static bool
7927 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7928 {
7929   gimple_stmt_iterator gsi;
7930   unsigned i;
7931
7932   for (i = 0; i < num_nodes; i++)
7933     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7934       {
7935         gimple *stmt = gsi_stmt (gsi);
7936         if (is_gimple_call (stmt)
7937             && !gimple_call_internal_p (stmt)
7938             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7939           return true;
7940       }
7941   return false;
7942 }
7943
7944 /* Determine cost scaling factor for basic blocks in loop.  */
7945 #define COST_SCALING_FACTOR_BOUND (20)
7946
7947 static void
7948 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7949 {
7950   int lfreq = data->current_loop->header->count.to_frequency (cfun);
7951   if (!data->speed || lfreq <= 0)
7952     return;
7953
7954   int max_freq = lfreq;
7955   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7956     {
7957       body[i]->aux = (void *)(intptr_t) 1;
7958       if (max_freq < body[i]->count.to_frequency (cfun))
7959         max_freq = body[i]->count.to_frequency (cfun);
7960     }
7961   if (max_freq > lfreq)
7962     {
7963       int divisor, factor;
7964       /* Check if scaling factor itself needs to be scaled by the bound.  This
7965          is to avoid overflow when scaling cost according to profile info.  */
7966       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7967         {
7968           divisor = max_freq;
7969           factor = COST_SCALING_FACTOR_BOUND;
7970         }
7971       else
7972         {
7973           divisor = lfreq;
7974           factor = 1;
7975         }
7976       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7977         {
7978           int bfreq = body[i]->count.to_frequency (cfun);
7979           if (bfreq <= lfreq)
7980             continue;
7981
7982           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7983         }
7984     }
7985 }
7986
7987 /* Find doloop comparison use and set its doloop_p on if found.  */
7988
7989 static bool
7990 find_doloop_use (struct ivopts_data *data)
7991 {
7992   struct loop *loop = data->current_loop;
7993
7994   for (unsigned i = 0; i < data->vgroups.length (); i++)
7995     {
7996       struct iv_group *group = data->vgroups[i];
7997       if (group->type == USE_COMPARE)
7998         {
7999           gcc_assert (group->vuses.length () == 1);
8000           struct iv_use *use = group->vuses[0];
8001           gimple *stmt = use->stmt;
8002           if (gimple_code (stmt) == GIMPLE_COND)
8003             {
8004               basic_block bb = gimple_bb (stmt);
8005               edge true_edge, false_edge;
8006               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8007               /* This comparison is used for loop latch.  Require latch is empty
8008                  for now.  */
8009               if ((loop->latch == true_edge->dest
8010                    || loop->latch == false_edge->dest)
8011                   && empty_block_p (loop->latch))
8012                 {
8013                   group->doloop_p = true;
8014                   if (dump_file && (dump_flags & TDF_DETAILS))
8015                     {
8016                       fprintf (dump_file, "Doloop cmp iv use: ");
8017                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8018                     }
8019                   return true;
8020                 }
8021             }
8022         }
8023     }
8024
8025   return false;
8026 }
8027
8028 /* For the targets which support doloop, to predict whether later RTL doloop
8029    transformation will perform on this loop, further detect the doloop use and
8030    mark the flag doloop_use_p if predicted.  */
8031
8032 void
8033 analyze_and_mark_doloop_use (struct ivopts_data *data)
8034 {
8035   data->doloop_use_p = false;
8036
8037   if (!flag_branch_on_count_reg)
8038     return;
8039
8040   if (data->current_loop->unroll == USHRT_MAX)
8041     return;
8042
8043   if (!generic_predict_doloop_p (data))
8044     return;
8045
8046   if (find_doloop_use (data))
8047     {
8048       data->doloop_use_p = true;
8049       if (dump_file && (dump_flags & TDF_DETAILS))
8050         {
8051           struct loop *loop = data->current_loop;
8052           fprintf (dump_file,
8053                    "Predict loop %d can perform"
8054                    " doloop optimization later.\n",
8055                    loop->num);
8056           flow_loop_dump (loop, dump_file, NULL, 1);
8057         }
8058     }
8059 }
8060
8061 /* Optimizes the LOOP.  Returns true if anything changed.  */
8062
8063 static bool
8064 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8065                            bitmap toremove)
8066 {
8067   bool changed = false;
8068   class iv_ca *iv_ca;
8069   edge exit = single_dom_exit (loop);
8070   basic_block *body;
8071
8072   gcc_assert (!data->niters);
8073   data->current_loop = loop;
8074   data->loop_loc = find_loop_location (loop).get_location_t ();
8075   data->speed = optimize_loop_for_speed_p (loop);
8076
8077   if (dump_file && (dump_flags & TDF_DETAILS))
8078     {
8079       fprintf (dump_file, "Processing loop %d", loop->num);
8080       if (data->loop_loc != UNKNOWN_LOCATION)
8081         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8082                  LOCATION_LINE (data->loop_loc));
8083       fprintf (dump_file, "\n");
8084
8085       if (exit)
8086         {
8087           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
8088                    exit->src->index, exit->dest->index);
8089           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8090           fprintf (dump_file, "\n");
8091         }
8092
8093       fprintf (dump_file, "\n");
8094     }
8095
8096   body = get_loop_body (loop);
8097   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8098   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8099
8100   data->loop_single_exit_p
8101     = exit != NULL && loop_only_exit_p (loop, body, exit);
8102
8103   /* For each ssa name determines whether it behaves as an induction variable
8104      in some loop.  */
8105   if (!find_induction_variables (data, body))
8106     goto finish;
8107
8108   /* Finds interesting uses (item 1).  */
8109   find_interesting_uses (data, body);
8110   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8111     goto finish;
8112
8113   /* Determine cost scaling factor for basic blocks in loop.  */
8114   determine_scaling_factor (data, body);
8115
8116   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8117   analyze_and_mark_doloop_use (data);
8118
8119   /* Finds candidates for the induction variables (item 2).  */
8120   find_iv_candidates (data);
8121
8122   /* Calculates the costs (item 3, part 1).  */
8123   determine_iv_costs (data);
8124   determine_group_iv_costs (data);
8125   determine_set_costs (data);
8126
8127   /* Find the optimal set of induction variables (item 3, part 2).  */
8128   iv_ca = find_optimal_iv_set (data);
8129   /* Cleanup basic block aux field.  */
8130   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8131     body[i]->aux = NULL;
8132   if (!iv_ca)
8133     goto finish;
8134   changed = true;
8135
8136   /* Create the new induction variables (item 4, part 1).  */
8137   create_new_ivs (data, iv_ca);
8138   iv_ca_free (&iv_ca);
8139
8140   /* Rewrite the uses (item 4, part 2).  */
8141   rewrite_groups (data);
8142
8143   /* Remove the ivs that are unused after rewriting.  */
8144   remove_unused_ivs (data, toremove);
8145
8146 finish:
8147   free (body);
8148   free_loop_data (data);
8149
8150   return changed;
8151 }
8152
8153 /* Main entry point.  Optimizes induction variables in loops.  */
8154
8155 void
8156 tree_ssa_iv_optimize (void)
8157 {
8158   struct ivopts_data data;
8159   auto_bitmap toremove;
8160
8161   tree_ssa_iv_optimize_init (&data);
8162
8163   /* Optimize the loops starting with the innermost ones.  */
8164   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8165     {
8166       if (!dbg_cnt (ivopts_loop))
8167         continue;
8168
8169       if (dump_file && (dump_flags & TDF_DETAILS))
8170         flow_loop_dump (loop, dump_file, NULL, 1);
8171
8172       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8173     }
8174
8175   /* Remove eliminated IV defs.  */
8176   release_defs_bitset (toremove);
8177
8178   /* We have changed the structure of induction variables; it might happen
8179      that definitions in the scev database refer to some of them that were
8180      eliminated.  */
8181   scev_reset_htab ();
8182   /* Likewise niter and control-IV information.  */
8183   free_numbers_of_iterations_estimates (cfun);
8184
8185   tree_ssa_iv_optimize_finalize (&data);
8186 }
8187
8188 #include "gt-tree-ssa-loop-ivopts.h"