gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   enum tree_code comp;  /* For iv elimination, the comparison.  */
 350   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 351                            preserved when representing iv_use with iv_cand.  */
 352   bitmap inv_exprs;     /* The list of newly created invariant expressions
 353                            when representing iv_use with iv_cand.  */
 354   tree value;           /* For final value elimination, the expression for
 355                            the final value of the iv.  For iv elimination,
 356                            the new bound to compare with.  */
 357 };
 358
 359 /* Use.  */
 360 struct iv_use
 361 {
 362   unsigned id;          /* The id of the use.  */
 363   unsigned group_id;    /* The group id the use belongs to.  */
 364   enum use_type type;   /* Type of the use.  */
 365   struct iv *iv;        /* The induction variable it is based on.  */
 366   gimple *stmt;         /* Statement in that it occurs.  */
 367   tree *op_p;           /* The place where it occurs.  */
 368
 369   tree addr_base;       /* Base address with const offset stripped.  */
 370   unsigned HOST_WIDE_INT addr_offset;
 371                         /* Const offset stripped from base address.  */
 372 };
 373
 374 /* Group of uses.  */
 375 struct iv_group
 376 {
 377   /* The id of the group.  */
 378   unsigned id;
 379   /* Uses of the group are of the same type.  */
 380   enum use_type type;
 381   /* The set of "related" IV candidates, plus the important ones.  */
 382   bitmap related_cands;
 383   /* Number of IV candidates in the cost_map.  */
 384   unsigned n_map_members;
 385   /* The costs wrto the iv candidates.  */
 386   struct cost_pair *cost_map;
 387   /* The selected candidate for the group.  */
 388   struct iv_cand *selected;
 389   /* Uses in the group.  */
 390   vec<struct iv_use *> vuses;
 391 };
 392
 393 /* The position where the iv is computed.  */
 394 enum iv_position
 395 {
 396   IP_NORMAL,            /* At the end, just before the exit condition.  */
 397   IP_END,               /* At the end of the latch block.  */
 398   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 399   IP_AFTER_USE,         /* Immediately after a specific use.  */
 400   IP_ORIGINAL           /* The original biv.  */
 401 };
 402
 403 /* The induction variable candidate.  */
 404 struct iv_cand
 405 {
 406   unsigned id;          /* The number of the candidate.  */
 407   bool important;       /* Whether this is an "important" candidate, i.e. such
 408                            that it should be considered by all uses.  */
 409   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 410   gimple *incremented_at;/* For original biv, the statement where it is
 411                            incremented.  */
 412   tree var_before;      /* The variable used for it before increment.  */
 413   tree var_after;       /* The variable used for it after increment.  */
 414   struct iv *iv;        /* The value of the candidate.  NULL for
 415                            "pseudocandidate" used to indicate the possibility
 416                            to replace the final value of an iv by direct
 417                            computation of the value.  */
 418   unsigned cost;        /* Cost of the candidate.  */
 419   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 420   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 421                               where it is incremented.  */
 422   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 423                            iv_cand.  */
 424   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 425                            hanlde it as a new invariant expression which will
 426                            be hoisted out of loop.  */
 427   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 428                            smaller type.  */
 429 };
 430
 431 /* Hashtable entry for common candidate derived from iv uses.  */
 432 struct iv_common_cand
 433 {
 434   tree base;
 435   tree step;
 436   /* IV uses from which this common candidate is derived.  */
 437   auto_vec<struct iv_use *> uses;
 438   hashval_t hash;
 439 };
 440
 441 /* Hashtable helpers.  */
 442
 443 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 444 {
 445   static inline hashval_t hash (const iv_common_cand *);
 446   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 447 };
 448
 449 /* Hash function for possible common candidates.  */
 450
 451 inline hashval_t
 452 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 453 {
 454   return ccand->hash;
 455 }
 456
 457 /* Hash table equality function for common candidates.  */
 458
 459 inline bool
 460 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 461                               const iv_common_cand *ccand2)
 462 {
 463   return (ccand1->hash == ccand2->hash
 464           && operand_equal_p (ccand1->base, ccand2->base, 0)
 465           && operand_equal_p (ccand1->step, ccand2->step, 0)
 466           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 467               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 468 }
 469
 470 /* Loop invariant expression hashtable entry.  */
 471
 472 struct iv_inv_expr_ent
 473 {
 474   /* Tree expression of the entry.  */
 475   tree expr;
 476   /* Unique indentifier.  */
 477   int id;
 478   /* Hash value.  */
 479   hashval_t hash;
 480 };
 481
 482 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 483
 484 static int
 485 sort_iv_inv_expr_ent (const void *a, const void *b)
 486 {
 487   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 488   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 489
 490   unsigned id1 = (*e1)->id;
 491   unsigned id2 = (*e2)->id;
 492
 493   if (id1 < id2)
 494     return -1;
 495   else if (id1 > id2)
 496     return 1;
 497   else
 498     return 0;
 499 }
 500
 501 /* Hashtable helpers.  */
 502
 503 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 504 {
 505   static inline hashval_t hash (const iv_inv_expr_ent *);
 506   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 507 };
 508
 509 /* Hash function for loop invariant expressions.  */
 510
 511 inline hashval_t
 512 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 513 {
 514   return expr->hash;
 515 }
 516
 517 /* Hash table equality function for expressions.  */
 518
 519 inline bool
 520 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 521                            const iv_inv_expr_ent *expr2)
 522 {
 523   return expr1->hash == expr2->hash
 524          && operand_equal_p (expr1->expr, expr2->expr, 0);
 525 }
 526
 527 struct ivopts_data
 528 {
 529   /* The currently optimized loop.  */
 530   struct loop *current_loop;
 531   source_location loop_loc;
 532
 533   /* Numbers of iterations for all exits of the current loop.  */
 534   hash_map<edge, tree_niter_desc *> *niters;
 535
 536   /* Number of registers used in it.  */
 537   unsigned regs_used;
 538
 539   /* The size of version_info array allocated.  */
 540   unsigned version_info_size;
 541
 542   /* The array of information for the ssa names.  */
 543   struct version_info *version_info;
 544
 545   /* The hashtable of loop invariant expressions created
 546      by ivopt.  */
 547   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 548
 549   /* The bitmap of indices in version_info whose value was changed.  */
 550   bitmap relevant;
 551
 552   /* The uses of induction variables.  */
 553   vec<iv_group *> vgroups;
 554
 555   /* The candidates.  */
 556   vec<iv_cand *> vcands;
 557
 558   /* A bitmap of important candidates.  */
 559   bitmap important_candidates;
 560
 561   /* Cache used by tree_to_aff_combination_expand.  */
 562   hash_map<tree, name_expansion *> *name_expansion_cache;
 563
 564   /* The hashtable of common candidates derived from iv uses.  */
 565   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 566
 567   /* The common candidates.  */
 568   vec<iv_common_cand *> iv_common_cands;
 569
 570   /* The maximum invariant variable id.  */
 571   unsigned max_inv_var_id;
 572
 573   /* The maximum invariant expression id.  */
 574   unsigned max_inv_expr_id;
 575
 576   /* Number of no_overflow BIVs which are not used in memory address.  */
 577   unsigned bivs_not_used_in_addr;
 578
 579   /* Obstack for iv structure.  */
 580   struct obstack iv_obstack;
 581
 582   /* Whether to consider just related and important candidates when replacing a
 583      use.  */
 584   bool consider_all_candidates;
 585
 586   /* Are we optimizing for speed?  */
 587   bool speed;
 588
 589   /* Whether the loop body includes any function calls.  */
 590   bool body_includes_call;
 591
 592   /* Whether the loop body can only be exited via single exit.  */
 593   bool loop_single_exit_p;
 594 };
 595
 596 /* An assignment of iv candidates to uses.  */
 597
 598 struct iv_ca
 599 {
 600   /* The number of uses covered by the assignment.  */
 601   unsigned upto;
 602
 603   /* Number of uses that cannot be expressed by the candidates in the set.  */
 604   unsigned bad_groups;
 605
 606   /* Candidate assigned to a use, together with the related costs.  */
 607   struct cost_pair **cand_for_group;
 608
 609   /* Number of times each candidate is used.  */
 610   unsigned *n_cand_uses;
 611
 612   /* The candidates used.  */
 613   bitmap cands;
 614
 615   /* The number of candidates in the set.  */
 616   unsigned n_cands;
 617
 618   /* The number of invariants needed, including both invariant variants and
 619      invariant expressions.  */
 620   unsigned n_invs;
 621
 622   /* Total cost of expressing uses.  */
 623   comp_cost cand_use_cost;
 624
 625   /* Total cost of candidates.  */
 626   unsigned cand_cost;
 627
 628   /* Number of times each invariant variable is used.  */
 629   unsigned *n_inv_var_uses;
 630
 631   /* Number of times each invariant expression is used.  */
 632   unsigned *n_inv_expr_uses;
 633
 634   /* Total cost of the assignment.  */
 635   comp_cost cost;
 636 };
 637
 638 /* Difference of two iv candidate assignments.  */
 639
 640 struct iv_ca_delta
 641 {
 642   /* Changed group.  */
 643   struct iv_group *group;
 644
 645   /* An old assignment (for rollback purposes).  */
 646   struct cost_pair *old_cp;
 647
 648   /* A new assignment.  */
 649   struct cost_pair *new_cp;
 650
 651   /* Next change in the list.  */
 652   struct iv_ca_delta *next;
 653 };
 654
 655 /* Bound on number of candidates below that all candidates are considered.  */
 656
 657 #define CONSIDER_ALL_CANDIDATES_BOUND \
 658   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 659
 660 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 661    optimizing such a loop would help, and it would take ages).  */
 662
 663 #define MAX_CONSIDERED_GROUPS \
 664   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 665
 666 /* If there are at most this number of ivs in the set, try removing unnecessary
 667    ivs from the set always.  */
 668
 669 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 670   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 671
 672 /* The list of trees for that the decl_rtl field must be reset is stored
 673    here.  */
 674
 675 static vec<tree> decl_rtl_to_reset;
 676
 677 static comp_cost force_expr_to_var_cost (tree, bool);
 678
 679 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 680
 681 edge
 682 single_dom_exit (struct loop *loop)
 683 {
 684   edge exit = single_exit (loop);
 685
 686   if (!exit)
 687     return NULL;
 688
 689   if (!just_once_each_iteration_p (loop, exit->src))
 690     return NULL;
 691
 692   return exit;
 693 }
 694
 695 /* Dumps information about the induction variable IV to FILE.  Don't dump
 696    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 697    preceding spaces indicated by INDENT_LEVEL.  */
 698
 699 void
 700 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 701 {
 702   const char *p;
 703   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 704
 705   if (indent_level > 4)
 706     indent_level = 4;
 707   p = spaces + 8 - (indent_level << 1);
 708
 709   fprintf (file, "%sIV struct:\n", p);
 710   if (iv->ssa_name && dump_name)
 711     {
 712       fprintf (file, "%s  SSA_NAME:\t", p);
 713       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 714       fprintf (file, "\n");
 715     }
 716
 717   fprintf (file, "%s  Type:\t", p);
 718   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 719   fprintf (file, "\n");
 720
 721   fprintf (file, "%s  Base:\t", p);
 722   print_generic_expr (file, iv->base, TDF_SLIM);
 723   fprintf (file, "\n");
 724
 725   fprintf (file, "%s  Step:\t", p);
 726   print_generic_expr (file, iv->step, TDF_SLIM);
 727   fprintf (file, "\n");
 728
 729   if (iv->base_object)
 730     {
 731       fprintf (file, "%s  Object:\t", p);
 732       print_generic_expr (file, iv->base_object, TDF_SLIM);
 733       fprintf (file, "\n");
 734     }
 735
 736   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 737
 738   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 739            p, iv->no_overflow ? "No-overflow" : "Overflow");
 740 }
 741
 742 /* Dumps information about the USE to FILE.  */
 743
 744 void
 745 dump_use (FILE *file, struct iv_use *use)
 746 {
 747   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 748   fprintf (file, "    At stmt:\t");
 749   print_gimple_stmt (file, use->stmt, 0);
 750   fprintf (file, "    At pos:\t");
 751   if (use->op_p)
 752     print_generic_expr (file, *use->op_p, TDF_SLIM);
 753   fprintf (file, "\n");
 754   dump_iv (file, use->iv, false, 2);
 755 }
 756
 757 /* Dumps information about the uses to FILE.  */
 758
 759 void
 760 dump_groups (FILE *file, struct ivopts_data *data)
 761 {
 762   unsigned i, j;
 763   struct iv_group *group;
 764
 765   for (i = 0; i < data->vgroups.length (); i++)
 766     {
 767       group = data->vgroups[i];
 768       fprintf (file, "Group %d:\n", group->id);
 769       if (group->type == USE_NONLINEAR_EXPR)
 770         fprintf (file, "  Type:\tGENERIC\n");
 771       else if (group->type == USE_ADDRESS)
 772         fprintf (file, "  Type:\tADDRESS\n");
 773       else
 774         {
 775           gcc_assert (group->type == USE_COMPARE);
 776           fprintf (file, "  Type:\tCOMPARE\n");
 777         }
 778       for (j = 0; j < group->vuses.length (); j++)
 779         dump_use (file, group->vuses[j]);
 780     }
 781 }
 782
 783 /* Dumps information about induction variable candidate CAND to FILE.  */
 784
 785 void
 786 dump_cand (FILE *file, struct iv_cand *cand)
 787 {
 788   struct iv *iv = cand->iv;
 789
 790   fprintf (file, "Candidate %d:\n", cand->id);
 791   if (cand->inv_vars)
 792     {
 793       fprintf (file, "  Depend on inv.vars: ");
 794       dump_bitmap (file, cand->inv_vars);
 795     }
 796   if (cand->inv_exprs)
 797     {
 798       fprintf (file, "  Depend on inv.exprs: ");
 799       dump_bitmap (file, cand->inv_exprs);
 800     }
 801
 802   if (cand->var_before)
 803     {
 804       fprintf (file, "  Var befor: ");
 805       print_generic_expr (file, cand->var_before, TDF_SLIM);
 806       fprintf (file, "\n");
 807     }
 808   if (cand->var_after)
 809     {
 810       fprintf (file, "  Var after: ");
 811       print_generic_expr (file, cand->var_after, TDF_SLIM);
 812       fprintf (file, "\n");
 813     }
 814
 815   switch (cand->pos)
 816     {
 817     case IP_NORMAL:
 818       fprintf (file, "  Incr POS: before exit test\n");
 819       break;
 820
 821     case IP_BEFORE_USE:
 822       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 823       break;
 824
 825     case IP_AFTER_USE:
 826       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 827       break;
 828
 829     case IP_END:
 830       fprintf (file, "  Incr POS: at end\n");
 831       break;
 832
 833     case IP_ORIGINAL:
 834       fprintf (file, "  Incr POS: orig biv\n");
 835       break;
 836     }
 837
 838   dump_iv (file, iv, false, 1);
 839 }
 840
 841 /* Returns the info for ssa version VER.  */
 842
 843 static inline struct version_info *
 844 ver_info (struct ivopts_data *data, unsigned ver)
 845 {
 846   return data->version_info + ver;
 847 }
 848
 849 /* Returns the info for ssa name NAME.  */
 850
 851 static inline struct version_info *
 852 name_info (struct ivopts_data *data, tree name)
 853 {
 854   return ver_info (data, SSA_NAME_VERSION (name));
 855 }
 856
 857 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 858    emitted in LOOP.  */
 859
 860 static bool
 861 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 862 {
 863   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 864
 865   gcc_assert (bb);
 866
 867   if (sbb == loop->latch)
 868     return true;
 869
 870   if (sbb != bb)
 871     return false;
 872
 873   return stmt == last_stmt (bb);
 874 }
 875
 876 /* Returns true if STMT if after the place where the original induction
 877    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 878    if the positions are identical.  */
 879
 880 static bool
 881 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 882 {
 883   basic_block cand_bb = gimple_bb (cand->incremented_at);
 884   basic_block stmt_bb = gimple_bb (stmt);
 885
 886   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 887     return false;
 888
 889   if (stmt_bb != cand_bb)
 890     return true;
 891
 892   if (true_if_equal
 893       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 894     return true;
 895   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 896 }
 897
 898 /* Returns true if STMT if after the place where the induction variable
 899    CAND is incremented in LOOP.  */
 900
 901 static bool
 902 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 903 {
 904   switch (cand->pos)
 905     {
 906     case IP_END:
 907       return false;
 908
 909     case IP_NORMAL:
 910       return stmt_after_ip_normal_pos (loop, stmt);
 911
 912     case IP_ORIGINAL:
 913     case IP_AFTER_USE:
 914       return stmt_after_inc_pos (cand, stmt, false);
 915
 916     case IP_BEFORE_USE:
 917       return stmt_after_inc_pos (cand, stmt, true);
 918
 919     default:
 920       gcc_unreachable ();
 921     }
 922 }
 923
 924 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 925
 926 static bool
 927 abnormal_ssa_name_p (tree exp)
 928 {
 929   if (!exp)
 930     return false;
 931
 932   if (TREE_CODE (exp) != SSA_NAME)
 933     return false;
 934
 935   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 936 }
 937
 938 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 939    abnormal phi node.  Callback for for_each_index.  */
 940
 941 static bool
 942 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 943                                   void *data ATTRIBUTE_UNUSED)
 944 {
 945   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 946     {
 947       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 948         return false;
 949       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 950         return false;
 951     }
 952
 953   return !abnormal_ssa_name_p (*index);
 954 }
 955
 956 /* Returns true if EXPR contains a ssa name that occurs in an
 957    abnormal phi node.  */
 958
 959 bool
 960 contains_abnormal_ssa_name_p (tree expr)
 961 {
 962   enum tree_code code;
 963   enum tree_code_class codeclass;
 964
 965   if (!expr)
 966     return false;
 967
 968   code = TREE_CODE (expr);
 969   codeclass = TREE_CODE_CLASS (code);
 970
 971   if (code == SSA_NAME)
 972     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 973
 974   if (code == INTEGER_CST
 975       || is_gimple_min_invariant (expr))
 976     return false;
 977
 978   if (code == ADDR_EXPR)
 979     return !for_each_index (&TREE_OPERAND (expr, 0),
 980                             idx_contains_abnormal_ssa_name_p,
 981                             NULL);
 982
 983   if (code == COND_EXPR)
 984     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 985       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 986       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 987
 988   switch (codeclass)
 989     {
 990     case tcc_binary:
 991     case tcc_comparison:
 992       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 993         return true;
 994
 995       /* Fallthru.  */
 996     case tcc_unary:
 997       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 998         return true;
 999
1000       break;
1001
1002     default:
1003       gcc_unreachable ();
1004     }
1005
1006   return false;
1007 }
1008
1009 /*  Returns the structure describing number of iterations determined from
1010     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1011
1012 static struct tree_niter_desc *
1013 niter_for_exit (struct ivopts_data *data, edge exit)
1014 {
1015   struct tree_niter_desc *desc;
1016   tree_niter_desc **slot;
1017
1018   if (!data->niters)
1019     {
1020       data->niters = new hash_map<edge, tree_niter_desc *>;
1021       slot = NULL;
1022     }
1023   else
1024     slot = data->niters->get (exit);
1025
1026   if (!slot)
1027     {
1028       /* Try to determine number of iterations.  We cannot safely work with ssa
1029          names that appear in phi nodes on abnormal edges, so that we do not
1030          create overlapping life ranges for them (PR 27283).  */
1031       desc = XNEW (struct tree_niter_desc);
1032       if (!number_of_iterations_exit (data->current_loop,
1033                                       exit, desc, true)
1034           || contains_abnormal_ssa_name_p (desc->niter))
1035         {
1036           XDELETE (desc);
1037           desc = NULL;
1038         }
1039       data->niters->put (exit, desc);
1040     }
1041   else
1042     desc = *slot;
1043
1044   return desc;
1045 }
1046
1047 /* Returns the structure describing number of iterations determined from
1048    single dominating exit of DATA->current_loop, or NULL if something
1049    goes wrong.  */
1050
1051 static struct tree_niter_desc *
1052 niter_for_single_dom_exit (struct ivopts_data *data)
1053 {
1054   edge exit = single_dom_exit (data->current_loop);
1055
1056   if (!exit)
1057     return NULL;
1058
1059   return niter_for_exit (data, exit);
1060 }
1061
1062 /* Initializes data structures used by the iv optimization pass, stored
1063    in DATA.  */
1064
1065 static void
1066 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1067 {
1068   data->version_info_size = 2 * num_ssa_names;
1069   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1070   data->relevant = BITMAP_ALLOC (NULL);
1071   data->important_candidates = BITMAP_ALLOC (NULL);
1072   data->max_inv_var_id = 0;
1073   data->max_inv_expr_id = 0;
1074   data->niters = NULL;
1075   data->vgroups.create (20);
1076   data->vcands.create (20);
1077   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1078   data->name_expansion_cache = NULL;
1079   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1080   data->iv_common_cands.create (20);
1081   decl_rtl_to_reset.create (20);
1082   gcc_obstack_init (&data->iv_obstack);
1083 }
1084
1085 /* Returns a memory object to that EXPR points.  In case we are able to
1086    determine that it does not point to any such object, NULL is returned.  */
1087
1088 static tree
1089 determine_base_object (tree expr)
1090 {
1091   enum tree_code code = TREE_CODE (expr);
1092   tree base, obj;
1093
1094   /* If this is a pointer casted to any type, we need to determine
1095      the base object for the pointer; so handle conversions before
1096      throwing away non-pointer expressions.  */
1097   if (CONVERT_EXPR_P (expr))
1098     return determine_base_object (TREE_OPERAND (expr, 0));
1099
1100   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1101     return NULL_TREE;
1102
1103   switch (code)
1104     {
1105     case INTEGER_CST:
1106       return NULL_TREE;
1107
1108     case ADDR_EXPR:
1109       obj = TREE_OPERAND (expr, 0);
1110       base = get_base_address (obj);
1111
1112       if (!base)
1113         return expr;
1114
1115       if (TREE_CODE (base) == MEM_REF)
1116         return determine_base_object (TREE_OPERAND (base, 0));
1117
1118       return fold_convert (ptr_type_node,
1119                            build_fold_addr_expr (base));
1120
1121     case POINTER_PLUS_EXPR:
1122       return determine_base_object (TREE_OPERAND (expr, 0));
1123
1124     case PLUS_EXPR:
1125     case MINUS_EXPR:
1126       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1127       gcc_unreachable ();
1128
1129     default:
1130       return fold_convert (ptr_type_node, expr);
1131     }
1132 }
1133
1134 /* Return true if address expression with non-DECL_P operand appears
1135    in EXPR.  */
1136
1137 static bool
1138 contain_complex_addr_expr (tree expr)
1139 {
1140   bool res = false;
1141
1142   STRIP_NOPS (expr);
1143   switch (TREE_CODE (expr))
1144     {
1145     case POINTER_PLUS_EXPR:
1146     case PLUS_EXPR:
1147     case MINUS_EXPR:
1148       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1149       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1150       break;
1151
1152     case ADDR_EXPR:
1153       return (!DECL_P (TREE_OPERAND (expr, 0)));
1154
1155     default:
1156       return false;
1157     }
1158
1159   return res;
1160 }
1161
1162 /* Allocates an induction variable with given initial value BASE and step STEP
1163    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1164
1165 static struct iv *
1166 alloc_iv (struct ivopts_data *data, tree base, tree step,
1167           bool no_overflow = false)
1168 {
1169   tree expr = base;
1170   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1171                                               sizeof (struct iv));
1172   gcc_assert (step != NULL_TREE);
1173
1174   /* Lower address expression in base except ones with DECL_P as operand.
1175      By doing this:
1176        1) More accurate cost can be computed for address expressions;
1177        2) Duplicate candidates won't be created for bases in different
1178           forms, like &a[0] and &a.  */
1179   STRIP_NOPS (expr);
1180   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1181       || contain_complex_addr_expr (expr))
1182     {
1183       aff_tree comb;
1184       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1185       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1186     }
1187
1188   iv->base = base;
1189   iv->base_object = determine_base_object (base);
1190   iv->step = step;
1191   iv->biv_p = false;
1192   iv->nonlin_use = NULL;
1193   iv->ssa_name = NULL_TREE;
1194   if (!no_overflow
1195        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1196                               base, step))
1197     no_overflow = true;
1198   iv->no_overflow = no_overflow;
1199   iv->have_address_use = false;
1200
1201   return iv;
1202 }
1203
1204 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1205    doesn't overflow.  */
1206
1207 static void
1208 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1209         bool no_overflow)
1210 {
1211   struct version_info *info = name_info (data, iv);
1212
1213   gcc_assert (!info->iv);
1214
1215   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1216   info->iv = alloc_iv (data, base, step, no_overflow);
1217   info->iv->ssa_name = iv;
1218 }
1219
1220 /* Finds induction variable declaration for VAR.  */
1221
1222 static struct iv *
1223 get_iv (struct ivopts_data *data, tree var)
1224 {
1225   basic_block bb;
1226   tree type = TREE_TYPE (var);
1227
1228   if (!POINTER_TYPE_P (type)
1229       && !INTEGRAL_TYPE_P (type))
1230     return NULL;
1231
1232   if (!name_info (data, var)->iv)
1233     {
1234       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1235
1236       if (!bb
1237           || !flow_bb_inside_loop_p (data->current_loop, bb))
1238         set_iv (data, var, var, build_int_cst (type, 0), true);
1239     }
1240
1241   return name_info (data, var)->iv;
1242 }
1243
1244 /* Return the first non-invariant ssa var found in EXPR.  */
1245
1246 static tree
1247 extract_single_var_from_expr (tree expr)
1248 {
1249   int i, n;
1250   tree tmp;
1251   enum tree_code code;
1252
1253   if (!expr || is_gimple_min_invariant (expr))
1254     return NULL;
1255
1256   code = TREE_CODE (expr);
1257   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1258     {
1259       n = TREE_OPERAND_LENGTH (expr);
1260       for (i = 0; i < n; i++)
1261         {
1262           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1263
1264           if (tmp)
1265             return tmp;
1266         }
1267     }
1268   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1269 }
1270
1271 /* Finds basic ivs.  */
1272
1273 static bool
1274 find_bivs (struct ivopts_data *data)
1275 {
1276   gphi *phi;
1277   affine_iv iv;
1278   tree step, type, base, stop;
1279   bool found = false;
1280   struct loop *loop = data->current_loop;
1281   gphi_iterator psi;
1282
1283   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1284     {
1285       phi = psi.phi ();
1286
1287       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1288         continue;
1289
1290       if (virtual_operand_p (PHI_RESULT (phi)))
1291         continue;
1292
1293       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1294         continue;
1295
1296       if (integer_zerop (iv.step))
1297         continue;
1298
1299       step = iv.step;
1300       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1301       /* Stop expanding iv base at the first ssa var referred by iv step.
1302          Ideally we should stop at any ssa var, because that's expensive
1303          and unusual to happen, we just do it on the first one.
1304
1305          See PR64705 for the rationale.  */
1306       stop = extract_single_var_from_expr (step);
1307       base = expand_simple_operations (base, stop);
1308       if (contains_abnormal_ssa_name_p (base)
1309           || contains_abnormal_ssa_name_p (step))
1310         continue;
1311
1312       type = TREE_TYPE (PHI_RESULT (phi));
1313       base = fold_convert (type, base);
1314       if (step)
1315         {
1316           if (POINTER_TYPE_P (type))
1317             step = convert_to_ptrofftype (step);
1318           else
1319             step = fold_convert (type, step);
1320         }
1321
1322       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1323       found = true;
1324     }
1325
1326   return found;
1327 }
1328
1329 /* Marks basic ivs.  */
1330
1331 static void
1332 mark_bivs (struct ivopts_data *data)
1333 {
1334   gphi *phi;
1335   gimple *def;
1336   tree var;
1337   struct iv *iv, *incr_iv;
1338   struct loop *loop = data->current_loop;
1339   basic_block incr_bb;
1340   gphi_iterator psi;
1341
1342   data->bivs_not_used_in_addr = 0;
1343   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1344     {
1345       phi = psi.phi ();
1346
1347       iv = get_iv (data, PHI_RESULT (phi));
1348       if (!iv)
1349         continue;
1350
1351       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1352       def = SSA_NAME_DEF_STMT (var);
1353       /* Don't mark iv peeled from other one as biv.  */
1354       if (def
1355           && gimple_code (def) == GIMPLE_PHI
1356           && gimple_bb (def) == loop->header)
1357         continue;
1358
1359       incr_iv = get_iv (data, var);
1360       if (!incr_iv)
1361         continue;
1362
1363       /* If the increment is in the subloop, ignore it.  */
1364       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1365       if (incr_bb->loop_father != data->current_loop
1366           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1367         continue;
1368
1369       iv->biv_p = true;
1370       incr_iv->biv_p = true;
1371       if (iv->no_overflow)
1372         data->bivs_not_used_in_addr++;
1373       if (incr_iv->no_overflow)
1374         data->bivs_not_used_in_addr++;
1375     }
1376 }
1377
1378 /* Checks whether STMT defines a linear induction variable and stores its
1379    parameters to IV.  */
1380
1381 static bool
1382 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1383 {
1384   tree lhs, stop;
1385   struct loop *loop = data->current_loop;
1386
1387   iv->base = NULL_TREE;
1388   iv->step = NULL_TREE;
1389
1390   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1391     return false;
1392
1393   lhs = gimple_assign_lhs (stmt);
1394   if (TREE_CODE (lhs) != SSA_NAME)
1395     return false;
1396
1397   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1398     return false;
1399
1400   /* Stop expanding iv base at the first ssa var referred by iv step.
1401      Ideally we should stop at any ssa var, because that's expensive
1402      and unusual to happen, we just do it on the first one.
1403
1404      See PR64705 for the rationale.  */
1405   stop = extract_single_var_from_expr (iv->step);
1406   iv->base = expand_simple_operations (iv->base, stop);
1407   if (contains_abnormal_ssa_name_p (iv->base)
1408       || contains_abnormal_ssa_name_p (iv->step))
1409     return false;
1410
1411   /* If STMT could throw, then do not consider STMT as defining a GIV.
1412      While this will suppress optimizations, we can not safely delete this
1413      GIV and associated statements, even if it appears it is not used.  */
1414   if (stmt_could_throw_p (stmt))
1415     return false;
1416
1417   return true;
1418 }
1419
1420 /* Finds general ivs in statement STMT.  */
1421
1422 static void
1423 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1424 {
1425   affine_iv iv;
1426
1427   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1428     return;
1429
1430   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1431 }
1432
1433 /* Finds general ivs in basic block BB.  */
1434
1435 static void
1436 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1437 {
1438   gimple_stmt_iterator bsi;
1439
1440   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1441     find_givs_in_stmt (data, gsi_stmt (bsi));
1442 }
1443
1444 /* Finds general ivs.  */
1445
1446 static void
1447 find_givs (struct ivopts_data *data)
1448 {
1449   struct loop *loop = data->current_loop;
1450   basic_block *body = get_loop_body_in_dom_order (loop);
1451   unsigned i;
1452
1453   for (i = 0; i < loop->num_nodes; i++)
1454     find_givs_in_bb (data, body[i]);
1455   free (body);
1456 }
1457
1458 /* For each ssa name defined in LOOP determines whether it is an induction
1459    variable and if so, its initial value and step.  */
1460
1461 static bool
1462 find_induction_variables (struct ivopts_data *data)
1463 {
1464   unsigned i;
1465   bitmap_iterator bi;
1466
1467   if (!find_bivs (data))
1468     return false;
1469
1470   find_givs (data);
1471   mark_bivs (data);
1472
1473   if (dump_file && (dump_flags & TDF_DETAILS))
1474     {
1475       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1476
1477       if (niter)
1478         {
1479           fprintf (dump_file, "  number of iterations ");
1480           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1481           if (!integer_zerop (niter->may_be_zero))
1482             {
1483               fprintf (dump_file, "; zero if ");
1484               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1485             }
1486           fprintf (dump_file, "\n");
1487         };
1488
1489       fprintf (dump_file, "\n<Induction Vars>:\n");
1490       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1491         {
1492           struct version_info *info = ver_info (data, i);
1493           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1494             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1495         }
1496     }
1497
1498   return true;
1499 }
1500
1501 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1502    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1503    is the const offset stripped from IV base; for other types use, both
1504    are zero by default.  */
1505
1506 static struct iv_use *
1507 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1508             gimple *stmt, enum use_type type, tree addr_base,
1509             unsigned HOST_WIDE_INT addr_offset)
1510 {
1511   struct iv_use *use = XCNEW (struct iv_use);
1512
1513   use->id = group->vuses.length ();
1514   use->group_id = group->id;
1515   use->type = type;
1516   use->iv = iv;
1517   use->stmt = stmt;
1518   use->op_p = use_p;
1519   use->addr_base = addr_base;
1520   use->addr_offset = addr_offset;
1521
1522   group->vuses.safe_push (use);
1523   return use;
1524 }
1525
1526 /* Checks whether OP is a loop-level invariant and if so, records it.
1527    NONLINEAR_USE is true if the invariant is used in a way we do not
1528    handle specially.  */
1529
1530 static void
1531 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1532 {
1533   basic_block bb;
1534   struct version_info *info;
1535
1536   if (TREE_CODE (op) != SSA_NAME
1537       || virtual_operand_p (op))
1538     return;
1539
1540   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1541   if (bb
1542       && flow_bb_inside_loop_p (data->current_loop, bb))
1543     return;
1544
1545   info = name_info (data, op);
1546   info->name = op;
1547   info->has_nonlin_use |= nonlinear_use;
1548   if (!info->inv_id)
1549     info->inv_id = ++data->max_inv_var_id;
1550   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1551 }
1552
1553 /* Record a group of TYPE.  */
1554
1555 static struct iv_group *
1556 record_group (struct ivopts_data *data, enum use_type type)
1557 {
1558   struct iv_group *group = XCNEW (struct iv_group);
1559
1560   group->id = data->vgroups.length ();
1561   group->type = type;
1562   group->related_cands = BITMAP_ALLOC (NULL);
1563   group->vuses.create (1);
1564
1565   data->vgroups.safe_push (group);
1566   return group;
1567 }
1568
1569 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1570    New group will be created if there is no existing group for the use.  */
1571
1572 static struct iv_use *
1573 record_group_use (struct ivopts_data *data, tree *use_p,
1574                   struct iv *iv, gimple *stmt, enum use_type type)
1575 {
1576   tree addr_base = NULL;
1577   struct iv_group *group = NULL;
1578   unsigned HOST_WIDE_INT addr_offset = 0;
1579
1580   /* Record non address type use in a new group.  */
1581   if (type == USE_ADDRESS && iv->base_object)
1582     {
1583       unsigned int i;
1584
1585       addr_base = strip_offset (iv->base, &addr_offset);
1586       for (i = 0; i < data->vgroups.length (); i++)
1587         {
1588           struct iv_use *use;
1589
1590           group = data->vgroups[i];
1591           use = group->vuses[0];
1592           if (use->type != USE_ADDRESS || !use->iv->base_object)
1593             continue;
1594
1595           /* Check if it has the same stripped base and step.  */
1596           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1597               && operand_equal_p (iv->step, use->iv->step, 0)
1598               && operand_equal_p (addr_base, use->addr_base, 0))
1599             break;
1600         }
1601       if (i == data->vgroups.length ())
1602         group = NULL;
1603     }
1604
1605   if (!group)
1606     group = record_group (data, type);
1607
1608   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1609 }
1610
1611 /* Checks whether the use OP is interesting and if so, records it.  */
1612
1613 static struct iv_use *
1614 find_interesting_uses_op (struct ivopts_data *data, tree op)
1615 {
1616   struct iv *iv;
1617   gimple *stmt;
1618   struct iv_use *use;
1619
1620   if (TREE_CODE (op) != SSA_NAME)
1621     return NULL;
1622
1623   iv = get_iv (data, op);
1624   if (!iv)
1625     return NULL;
1626
1627   if (iv->nonlin_use)
1628     {
1629       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1630       return iv->nonlin_use;
1631     }
1632
1633   if (integer_zerop (iv->step))
1634     {
1635       record_invariant (data, op, true);
1636       return NULL;
1637     }
1638
1639   stmt = SSA_NAME_DEF_STMT (op);
1640   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1641
1642   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1643   iv->nonlin_use = use;
1644   return use;
1645 }
1646
1647 /* Indicate how compare type iv_use can be handled.  */
1648 enum comp_iv_rewrite
1649 {
1650   COMP_IV_NA,
1651   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1652   COMP_IV_EXPR,
1653   /* We may rewrite compare type iv_uses on both sides of comparison by
1654      expressing value of each iv_use.  */
1655   COMP_IV_EXPR_2,
1656   /* We may rewrite compare type iv_use by expressing value of the iv_use
1657      or by eliminating it with other iv_cand.  */
1658   COMP_IV_ELIM
1659 };
1660
1661 /* Given a condition in statement STMT, checks whether it is a compare
1662    of an induction variable and an invariant.  If this is the case,
1663    CONTROL_VAR is set to location of the iv, BOUND to the location of
1664    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1665    induction variable descriptions, and true is returned.  If this is not
1666    the case, CONTROL_VAR and BOUND are set to the arguments of the
1667    condition and false is returned.  */
1668
1669 static enum comp_iv_rewrite
1670 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1671                        tree **control_var, tree **bound,
1672                        struct iv **iv_var, struct iv **iv_bound)
1673 {
1674   /* The objects returned when COND has constant operands.  */
1675   static struct iv const_iv;
1676   static tree zero;
1677   tree *op0 = &zero, *op1 = &zero;
1678   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1679   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1680
1681   if (gimple_code (stmt) == GIMPLE_COND)
1682     {
1683       gcond *cond_stmt = as_a <gcond *> (stmt);
1684       op0 = gimple_cond_lhs_ptr (cond_stmt);
1685       op1 = gimple_cond_rhs_ptr (cond_stmt);
1686     }
1687   else
1688     {
1689       op0 = gimple_assign_rhs1_ptr (stmt);
1690       op1 = gimple_assign_rhs2_ptr (stmt);
1691     }
1692
1693   zero = integer_zero_node;
1694   const_iv.step = integer_zero_node;
1695
1696   if (TREE_CODE (*op0) == SSA_NAME)
1697     iv0 = get_iv (data, *op0);
1698   if (TREE_CODE (*op1) == SSA_NAME)
1699     iv1 = get_iv (data, *op1);
1700
1701   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1702   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1703     {
1704       rewrite_type = COMP_IV_EXPR_2;
1705       goto end;
1706     }
1707
1708   /* If none side of comparison is IV.  */
1709   if ((!iv0 || integer_zerop (iv0->step))
1710       && (!iv1 || integer_zerop (iv1->step)))
1711     goto end;
1712
1713   /* Control variable may be on the other side.  */
1714   if (!iv0 || integer_zerop (iv0->step))
1715     {
1716       std::swap (op0, op1);
1717       std::swap (iv0, iv1);
1718     }
1719   /* If one side is IV and the other side isn't loop invariant.  */
1720   if (!iv1)
1721     rewrite_type = COMP_IV_EXPR;
1722   /* If one side is IV and the other side is loop invariant.  */
1723   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1724     rewrite_type = COMP_IV_ELIM;
1725
1726 end:
1727   if (control_var)
1728     *control_var = op0;
1729   if (iv_var)
1730     *iv_var = iv0;
1731   if (bound)
1732     *bound = op1;
1733   if (iv_bound)
1734     *iv_bound = iv1;
1735
1736   return rewrite_type;
1737 }
1738
1739 /* Checks whether the condition in STMT is interesting and if so,
1740    records it.  */
1741
1742 static void
1743 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1744 {
1745   tree *var_p, *bound_p;
1746   struct iv *var_iv, *bound_iv;
1747   enum comp_iv_rewrite ret;
1748
1749   ret = extract_cond_operands (data, stmt,
1750                                &var_p, &bound_p, &var_iv, &bound_iv);
1751   if (ret == COMP_IV_NA)
1752     {
1753       find_interesting_uses_op (data, *var_p);
1754       find_interesting_uses_op (data, *bound_p);
1755       return;
1756     }
1757
1758   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE);
1759   /* Record compare type iv_use for iv on the other side of comparison.  */
1760   if (ret == COMP_IV_EXPR_2)
1761     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE);
1762 }
1763
1764 /* Returns the outermost loop EXPR is obviously invariant in
1765    relative to the loop LOOP, i.e. if all its operands are defined
1766    outside of the returned loop.  Returns NULL if EXPR is not
1767    even obviously invariant in LOOP.  */
1768
1769 struct loop *
1770 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1771 {
1772   basic_block def_bb;
1773   unsigned i, len;
1774
1775   if (is_gimple_min_invariant (expr))
1776     return current_loops->tree_root;
1777
1778   if (TREE_CODE (expr) == SSA_NAME)
1779     {
1780       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1781       if (def_bb)
1782         {
1783           if (flow_bb_inside_loop_p (loop, def_bb))
1784             return NULL;
1785           return superloop_at_depth (loop,
1786                                      loop_depth (def_bb->loop_father) + 1);
1787         }
1788
1789       return current_loops->tree_root;
1790     }
1791
1792   if (!EXPR_P (expr))
1793     return NULL;
1794
1795   unsigned maxdepth = 0;
1796   len = TREE_OPERAND_LENGTH (expr);
1797   for (i = 0; i < len; i++)
1798     {
1799       struct loop *ivloop;
1800       if (!TREE_OPERAND (expr, i))
1801         continue;
1802
1803       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1804       if (!ivloop)
1805         return NULL;
1806       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1807     }
1808
1809   return superloop_at_depth (loop, maxdepth);
1810 }
1811
1812 /* Returns true if expression EXPR is obviously invariant in LOOP,
1813    i.e. if all its operands are defined outside of the LOOP.  LOOP
1814    should not be the function body.  */
1815
1816 bool
1817 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1818 {
1819   basic_block def_bb;
1820   unsigned i, len;
1821
1822   gcc_assert (loop_depth (loop) > 0);
1823
1824   if (is_gimple_min_invariant (expr))
1825     return true;
1826
1827   if (TREE_CODE (expr) == SSA_NAME)
1828     {
1829       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1830       if (def_bb
1831           && flow_bb_inside_loop_p (loop, def_bb))
1832         return false;
1833
1834       return true;
1835     }
1836
1837   if (!EXPR_P (expr))
1838     return false;
1839
1840   len = TREE_OPERAND_LENGTH (expr);
1841   for (i = 0; i < len; i++)
1842     if (TREE_OPERAND (expr, i)
1843         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1844       return false;
1845
1846   return true;
1847 }
1848
1849 /* Given expression EXPR which computes inductive values with respect
1850    to loop recorded in DATA, this function returns biv from which EXPR
1851    is derived by tracing definition chains of ssa variables in EXPR.  */
1852
1853 static struct iv*
1854 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1855 {
1856   struct iv *iv;
1857   unsigned i, n;
1858   tree e2, e1;
1859   enum tree_code code;
1860   gimple *stmt;
1861
1862   if (expr == NULL_TREE)
1863     return NULL;
1864
1865   if (is_gimple_min_invariant (expr))
1866     return NULL;
1867
1868   code = TREE_CODE (expr);
1869   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1870     {
1871       n = TREE_OPERAND_LENGTH (expr);
1872       for (i = 0; i < n; i++)
1873         {
1874           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1875           if (iv)
1876             return iv;
1877         }
1878     }
1879
1880   /* Stop if it's not ssa name.  */
1881   if (code != SSA_NAME)
1882     return NULL;
1883
1884   iv = get_iv (data, expr);
1885   if (!iv || integer_zerop (iv->step))
1886     return NULL;
1887   else if (iv->biv_p)
1888     return iv;
1889
1890   stmt = SSA_NAME_DEF_STMT (expr);
1891   if (gphi *phi = dyn_cast <gphi *> (stmt))
1892     {
1893       ssa_op_iter iter;
1894       use_operand_p use_p;
1895       basic_block phi_bb = gimple_bb (phi);
1896
1897       /* Skip loop header PHI that doesn't define biv.  */
1898       if (phi_bb->loop_father == data->current_loop)
1899         return NULL;
1900
1901       if (virtual_operand_p (gimple_phi_result (phi)))
1902         return NULL;
1903
1904       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1905         {
1906           tree use = USE_FROM_PTR (use_p);
1907           iv = find_deriving_biv_for_expr (data, use);
1908           if (iv)
1909             return iv;
1910         }
1911       return NULL;
1912     }
1913   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1914     return NULL;
1915
1916   e1 = gimple_assign_rhs1 (stmt);
1917   code = gimple_assign_rhs_code (stmt);
1918   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1919     return find_deriving_biv_for_expr (data, e1);
1920
1921   switch (code)
1922     {
1923     case MULT_EXPR:
1924     case PLUS_EXPR:
1925     case MINUS_EXPR:
1926     case POINTER_PLUS_EXPR:
1927       /* Increments, decrements and multiplications by a constant
1928          are simple.  */
1929       e2 = gimple_assign_rhs2 (stmt);
1930       iv = find_deriving_biv_for_expr (data, e2);
1931       if (iv)
1932         return iv;
1933       gcc_fallthrough ();
1934
1935     CASE_CONVERT:
1936       /* Casts are simple.  */
1937       return find_deriving_biv_for_expr (data, e1);
1938
1939     default:
1940       break;
1941     }
1942
1943   return NULL;
1944 }
1945
1946 /* Record BIV, its predecessor and successor that they are used in
1947    address type uses.  */
1948
1949 static void
1950 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1951 {
1952   unsigned i;
1953   tree type, base_1, base_2;
1954   bitmap_iterator bi;
1955
1956   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1957       || biv->have_address_use || !biv->no_overflow)
1958     return;
1959
1960   type = TREE_TYPE (biv->base);
1961   if (!INTEGRAL_TYPE_P (type))
1962     return;
1963
1964   biv->have_address_use = true;
1965   data->bivs_not_used_in_addr--;
1966   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1967   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1968     {
1969       struct iv *iv = ver_info (data, i)->iv;
1970
1971       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1972           || iv->have_address_use || !iv->no_overflow)
1973         continue;
1974
1975       if (type != TREE_TYPE (iv->base)
1976           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1977         continue;
1978
1979       if (!operand_equal_p (biv->step, iv->step, 0))
1980         continue;
1981
1982       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1983       if (operand_equal_p (base_1, iv->base, 0)
1984           || operand_equal_p (base_2, biv->base, 0))
1985         {
1986           iv->have_address_use = true;
1987           data->bivs_not_used_in_addr--;
1988         }
1989     }
1990 }
1991
1992 /* Cumulates the steps of indices into DATA and replaces their values with the
1993    initial ones.  Returns false when the value of the index cannot be determined.
1994    Callback for for_each_index.  */
1995
1996 struct ifs_ivopts_data
1997 {
1998   struct ivopts_data *ivopts_data;
1999   gimple *stmt;
2000   tree step;
2001 };
2002
2003 static bool
2004 idx_find_step (tree base, tree *idx, void *data)
2005 {
2006   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2007   struct iv *iv;
2008   bool use_overflow_semantics = false;
2009   tree step, iv_base, iv_step, lbound, off;
2010   struct loop *loop = dta->ivopts_data->current_loop;
2011
2012   /* If base is a component ref, require that the offset of the reference
2013      be invariant.  */
2014   if (TREE_CODE (base) == COMPONENT_REF)
2015     {
2016       off = component_ref_field_offset (base);
2017       return expr_invariant_in_loop_p (loop, off);
2018     }
2019
2020   /* If base is array, first check whether we will be able to move the
2021      reference out of the loop (in order to take its address in strength
2022      reduction).  In order for this to work we need both lower bound
2023      and step to be loop invariants.  */
2024   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2025     {
2026       /* Moreover, for a range, the size needs to be invariant as well.  */
2027       if (TREE_CODE (base) == ARRAY_RANGE_REF
2028           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2029         return false;
2030
2031       step = array_ref_element_size (base);
2032       lbound = array_ref_low_bound (base);
2033
2034       if (!expr_invariant_in_loop_p (loop, step)
2035           || !expr_invariant_in_loop_p (loop, lbound))
2036         return false;
2037     }
2038
2039   if (TREE_CODE (*idx) != SSA_NAME)
2040     return true;
2041
2042   iv = get_iv (dta->ivopts_data, *idx);
2043   if (!iv)
2044     return false;
2045
2046   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2047           *&x[0], which is not folded and does not trigger the
2048           ARRAY_REF path below.  */
2049   *idx = iv->base;
2050
2051   if (integer_zerop (iv->step))
2052     return true;
2053
2054   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2055     {
2056       step = array_ref_element_size (base);
2057
2058       /* We only handle addresses whose step is an integer constant.  */
2059       if (TREE_CODE (step) != INTEGER_CST)
2060         return false;
2061     }
2062   else
2063     /* The step for pointer arithmetics already is 1 byte.  */
2064     step = size_one_node;
2065
2066   iv_base = iv->base;
2067   iv_step = iv->step;
2068   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2069     use_overflow_semantics = true;
2070
2071   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2072                             sizetype, &iv_base, &iv_step, dta->stmt,
2073                             use_overflow_semantics))
2074     {
2075       /* The index might wrap.  */
2076       return false;
2077     }
2078
2079   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2080   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2081
2082   if (dta->ivopts_data->bivs_not_used_in_addr)
2083     {
2084       if (!iv->biv_p)
2085         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2086
2087       record_biv_for_address_use (dta->ivopts_data, iv);
2088     }
2089   return true;
2090 }
2091
2092 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2093    object is passed to it in DATA.  */
2094
2095 static bool
2096 idx_record_use (tree base, tree *idx,
2097                 void *vdata)
2098 {
2099   struct ivopts_data *data = (struct ivopts_data *) vdata;
2100   find_interesting_uses_op (data, *idx);
2101   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2102     {
2103       find_interesting_uses_op (data, array_ref_element_size (base));
2104       find_interesting_uses_op (data, array_ref_low_bound (base));
2105     }
2106   return true;
2107 }
2108
2109 /* If we can prove that TOP = cst * BOT for some constant cst,
2110    store cst to MUL and return true.  Otherwise return false.
2111    The returned value is always sign-extended, regardless of the
2112    signedness of TOP and BOT.  */
2113
2114 static bool
2115 constant_multiple_of (tree top, tree bot, widest_int *mul)
2116 {
2117   tree mby;
2118   enum tree_code code;
2119   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2120   widest_int res, p0, p1;
2121
2122   STRIP_NOPS (top);
2123   STRIP_NOPS (bot);
2124
2125   if (operand_equal_p (top, bot, 0))
2126     {
2127       *mul = 1;
2128       return true;
2129     }
2130
2131   code = TREE_CODE (top);
2132   switch (code)
2133     {
2134     case MULT_EXPR:
2135       mby = TREE_OPERAND (top, 1);
2136       if (TREE_CODE (mby) != INTEGER_CST)
2137         return false;
2138
2139       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2140         return false;
2141
2142       *mul = wi::sext (res * wi::to_widest (mby), precision);
2143       return true;
2144
2145     case PLUS_EXPR:
2146     case MINUS_EXPR:
2147       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2148           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2149         return false;
2150
2151       if (code == MINUS_EXPR)
2152         p1 = -p1;
2153       *mul = wi::sext (p0 + p1, precision);
2154       return true;
2155
2156     case INTEGER_CST:
2157       if (TREE_CODE (bot) != INTEGER_CST)
2158         return false;
2159
2160       p0 = widest_int::from (wi::to_wide (top), SIGNED);
2161       p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2162       if (p1 == 0)
2163         return false;
2164       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2165       return res == 0;
2166
2167     default:
2168       return false;
2169     }
2170 }
2171
2172 /* Return true if memory reference REF with step STEP may be unaligned.  */
2173
2174 static bool
2175 may_be_unaligned_p (tree ref, tree step)
2176 {
2177   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2178      thus they are not misaligned.  */
2179   if (TREE_CODE (ref) == TARGET_MEM_REF)
2180     return false;
2181
2182   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2183   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2184     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2185
2186   unsigned HOST_WIDE_INT bitpos;
2187   unsigned int ref_align;
2188   get_object_alignment_1 (ref, &ref_align, &bitpos);
2189   if (ref_align < align
2190       || (bitpos % align) != 0
2191       || (bitpos % BITS_PER_UNIT) != 0)
2192     return true;
2193
2194   unsigned int trailing_zeros = tree_ctz (step);
2195   if (trailing_zeros < HOST_BITS_PER_INT
2196       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2197     return true;
2198
2199   return false;
2200 }
2201
2202 /* Return true if EXPR may be non-addressable.   */
2203
2204 bool
2205 may_be_nonaddressable_p (tree expr)
2206 {
2207   switch (TREE_CODE (expr))
2208     {
2209     case TARGET_MEM_REF:
2210       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2211          target, thus they are always addressable.  */
2212       return false;
2213
2214     case MEM_REF:
2215       /* Likewise for MEM_REFs, modulo the storage order.  */
2216       return REF_REVERSE_STORAGE_ORDER (expr);
2217
2218     case BIT_FIELD_REF:
2219       if (REF_REVERSE_STORAGE_ORDER (expr))
2220         return true;
2221       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2222
2223     case COMPONENT_REF:
2224       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2225         return true;
2226       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2227              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2228
2229     case ARRAY_REF:
2230     case ARRAY_RANGE_REF:
2231       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2232         return true;
2233       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2234
2235     case VIEW_CONVERT_EXPR:
2236       /* This kind of view-conversions may wrap non-addressable objects
2237          and make them look addressable.  After some processing the
2238          non-addressability may be uncovered again, causing ADDR_EXPRs
2239          of inappropriate objects to be built.  */
2240       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2241           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2242         return true;
2243       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2244
2245     CASE_CONVERT:
2246       return true;
2247
2248     default:
2249       break;
2250     }
2251
2252   return false;
2253 }
2254
2255 /* Finds addresses in *OP_P inside STMT.  */
2256
2257 static void
2258 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2259                                tree *op_p)
2260 {
2261   tree base = *op_p, step = size_zero_node;
2262   struct iv *civ;
2263   struct ifs_ivopts_data ifs_ivopts_data;
2264
2265   /* Do not play with volatile memory references.  A bit too conservative,
2266      perhaps, but safe.  */
2267   if (gimple_has_volatile_ops (stmt))
2268     goto fail;
2269
2270   /* Ignore bitfields for now.  Not really something terribly complicated
2271      to handle.  TODO.  */
2272   if (TREE_CODE (base) == BIT_FIELD_REF)
2273     goto fail;
2274
2275   base = unshare_expr (base);
2276
2277   if (TREE_CODE (base) == TARGET_MEM_REF)
2278     {
2279       tree type = build_pointer_type (TREE_TYPE (base));
2280       tree astep;
2281
2282       if (TMR_BASE (base)
2283           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2284         {
2285           civ = get_iv (data, TMR_BASE (base));
2286           if (!civ)
2287             goto fail;
2288
2289           TMR_BASE (base) = civ->base;
2290           step = civ->step;
2291         }
2292       if (TMR_INDEX2 (base)
2293           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2294         {
2295           civ = get_iv (data, TMR_INDEX2 (base));
2296           if (!civ)
2297             goto fail;
2298
2299           TMR_INDEX2 (base) = civ->base;
2300           step = civ->step;
2301         }
2302       if (TMR_INDEX (base)
2303           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2304         {
2305           civ = get_iv (data, TMR_INDEX (base));
2306           if (!civ)
2307             goto fail;
2308
2309           TMR_INDEX (base) = civ->base;
2310           astep = civ->step;
2311
2312           if (astep)
2313             {
2314               if (TMR_STEP (base))
2315                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2316
2317               step = fold_build2 (PLUS_EXPR, type, step, astep);
2318             }
2319         }
2320
2321       if (integer_zerop (step))
2322         goto fail;
2323       base = tree_mem_ref_addr (type, base);
2324     }
2325   else
2326     {
2327       ifs_ivopts_data.ivopts_data = data;
2328       ifs_ivopts_data.stmt = stmt;
2329       ifs_ivopts_data.step = size_zero_node;
2330       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2331           || integer_zerop (ifs_ivopts_data.step))
2332         goto fail;
2333       step = ifs_ivopts_data.step;
2334
2335       /* Check that the base expression is addressable.  This needs
2336          to be done after substituting bases of IVs into it.  */
2337       if (may_be_nonaddressable_p (base))
2338         goto fail;
2339
2340       /* Moreover, on strict alignment platforms, check that it is
2341          sufficiently aligned.  */
2342       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2343         goto fail;
2344
2345       base = build_fold_addr_expr (base);
2346
2347       /* Substituting bases of IVs into the base expression might
2348          have caused folding opportunities.  */
2349       if (TREE_CODE (base) == ADDR_EXPR)
2350         {
2351           tree *ref = &TREE_OPERAND (base, 0);
2352           while (handled_component_p (*ref))
2353             ref = &TREE_OPERAND (*ref, 0);
2354           if (TREE_CODE (*ref) == MEM_REF)
2355             {
2356               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2357                                       TREE_OPERAND (*ref, 0),
2358                                       TREE_OPERAND (*ref, 1));
2359               if (tem)
2360                 *ref = tem;
2361             }
2362         }
2363     }
2364
2365   civ = alloc_iv (data, base, step);
2366   /* Fail if base object of this memory reference is unknown.  */
2367   if (civ->base_object == NULL_TREE)
2368     goto fail;
2369
2370   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2371   return;
2372
2373 fail:
2374   for_each_index (op_p, idx_record_use, data);
2375 }
2376
2377 /* Finds and records invariants used in STMT.  */
2378
2379 static void
2380 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2381 {
2382   ssa_op_iter iter;
2383   use_operand_p use_p;
2384   tree op;
2385
2386   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2387     {
2388       op = USE_FROM_PTR (use_p);
2389       record_invariant (data, op, false);
2390     }
2391 }
2392
2393 /* Finds interesting uses of induction variables in the statement STMT.  */
2394
2395 static void
2396 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2397 {
2398   struct iv *iv;
2399   tree op, *lhs, *rhs;
2400   ssa_op_iter iter;
2401   use_operand_p use_p;
2402   enum tree_code code;
2403
2404   find_invariants_stmt (data, stmt);
2405
2406   if (gimple_code (stmt) == GIMPLE_COND)
2407     {
2408       find_interesting_uses_cond (data, stmt);
2409       return;
2410     }
2411
2412   if (is_gimple_assign (stmt))
2413     {
2414       lhs = gimple_assign_lhs_ptr (stmt);
2415       rhs = gimple_assign_rhs1_ptr (stmt);
2416
2417       if (TREE_CODE (*lhs) == SSA_NAME)
2418         {
2419           /* If the statement defines an induction variable, the uses are not
2420              interesting by themselves.  */
2421
2422           iv = get_iv (data, *lhs);
2423
2424           if (iv && !integer_zerop (iv->step))
2425             return;
2426         }
2427
2428       code = gimple_assign_rhs_code (stmt);
2429       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2430           && (REFERENCE_CLASS_P (*rhs)
2431               || is_gimple_val (*rhs)))
2432         {
2433           if (REFERENCE_CLASS_P (*rhs))
2434             find_interesting_uses_address (data, stmt, rhs);
2435           else
2436             find_interesting_uses_op (data, *rhs);
2437
2438           if (REFERENCE_CLASS_P (*lhs))
2439             find_interesting_uses_address (data, stmt, lhs);
2440           return;
2441         }
2442       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2443         {
2444           find_interesting_uses_cond (data, stmt);
2445           return;
2446         }
2447
2448       /* TODO -- we should also handle address uses of type
2449
2450          memory = call (whatever);
2451
2452          and
2453
2454          call (memory).  */
2455     }
2456
2457   if (gimple_code (stmt) == GIMPLE_PHI
2458       && gimple_bb (stmt) == data->current_loop->header)
2459     {
2460       iv = get_iv (data, PHI_RESULT (stmt));
2461
2462       if (iv && !integer_zerop (iv->step))
2463         return;
2464     }
2465
2466   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2467     {
2468       op = USE_FROM_PTR (use_p);
2469
2470       if (TREE_CODE (op) != SSA_NAME)
2471         continue;
2472
2473       iv = get_iv (data, op);
2474       if (!iv)
2475         continue;
2476
2477       find_interesting_uses_op (data, op);
2478     }
2479 }
2480
2481 /* Finds interesting uses of induction variables outside of loops
2482    on loop exit edge EXIT.  */
2483
2484 static void
2485 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2486 {
2487   gphi *phi;
2488   gphi_iterator psi;
2489   tree def;
2490
2491   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2492     {
2493       phi = psi.phi ();
2494       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2495       if (!virtual_operand_p (def))
2496         find_interesting_uses_op (data, def);
2497     }
2498 }
2499
2500 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2501    mode for memory reference represented by USE.  */
2502
2503 static GTY (()) vec<rtx, va_gc> *addr_list;
2504
2505 static bool
2506 addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
2507 {
2508   rtx reg, addr;
2509   unsigned list_index;
2510   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2511   machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2512
2513   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2514   if (list_index >= vec_safe_length (addr_list))
2515     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2516
2517   addr = (*addr_list)[list_index];
2518   if (!addr)
2519     {
2520       addr_mode = targetm.addr_space.address_mode (as);
2521       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2522       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2523       (*addr_list)[list_index] = addr;
2524     }
2525   else
2526     addr_mode = GET_MODE (addr);
2527
2528   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2529   return (memory_address_addr_space_p (mem_mode, addr, as));
2530 }
2531
2532 /* Comparison function to sort group in ascending order of addr_offset.  */
2533
2534 static int
2535 group_compare_offset (const void *a, const void *b)
2536 {
2537   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2538   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2539
2540   if ((*u1)->addr_offset != (*u2)->addr_offset)
2541     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2542   else
2543     return 0;
2544 }
2545
2546 /* Check if small groups should be split.  Return true if no group
2547    contains more than two uses with distinct addr_offsets.  Return
2548    false otherwise.  We want to split such groups because:
2549
2550      1) Small groups don't have much benefit and may interfer with
2551         general candidate selection.
2552      2) Size for problem with only small groups is usually small and
2553         general algorithm can handle it well.
2554
2555    TODO -- Above claim may not hold when we want to merge memory
2556    accesses with conseuctive addresses.  */
2557
2558 static bool
2559 split_small_address_groups_p (struct ivopts_data *data)
2560 {
2561   unsigned int i, j, distinct = 1;
2562   struct iv_use *pre;
2563   struct iv_group *group;
2564
2565   for (i = 0; i < data->vgroups.length (); i++)
2566     {
2567       group = data->vgroups[i];
2568       if (group->vuses.length () == 1)
2569         continue;
2570
2571       gcc_assert (group->type == USE_ADDRESS);
2572       if (group->vuses.length () == 2)
2573         {
2574           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2575             std::swap (group->vuses[0], group->vuses[1]);
2576         }
2577       else
2578         group->vuses.qsort (group_compare_offset);
2579
2580       if (distinct > 2)
2581         continue;
2582
2583       distinct = 1;
2584       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2585         {
2586           if (group->vuses[j]->addr_offset != pre->addr_offset)
2587             {
2588               pre = group->vuses[j];
2589               distinct++;
2590             }
2591
2592           if (distinct > 2)
2593             break;
2594         }
2595     }
2596
2597   return (distinct <= 2);
2598 }
2599
2600 /* For each group of address type uses, this function further groups
2601    these uses according to the maximum offset supported by target's
2602    [base + offset] addressing mode.  */
2603
2604 static void
2605 split_address_groups (struct ivopts_data *data)
2606 {
2607   unsigned int i, j;
2608   /* Always split group.  */
2609   bool split_p = split_small_address_groups_p (data);
2610
2611   for (i = 0; i < data->vgroups.length (); i++)
2612     {
2613       struct iv_group *new_group = NULL;
2614       struct iv_group *group = data->vgroups[i];
2615       struct iv_use *use = group->vuses[0];
2616
2617       use->id = 0;
2618       use->group_id = group->id;
2619       if (group->vuses.length () == 1)
2620         continue;
2621
2622       gcc_assert (group->type == USE_ADDRESS);
2623
2624       for (j = 1; j < group->vuses.length ();)
2625         {
2626           struct iv_use *next = group->vuses[j];
2627           HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
2628
2629           /* Split group if aksed to, or the offset against the first
2630              use can't fit in offset part of addressing mode.  IV uses
2631              having the same offset are still kept in one group.  */
2632           if (offset != 0 &&
2633               (split_p || !addr_offset_valid_p (use, offset)))
2634             {
2635               if (!new_group)
2636                 new_group = record_group (data, group->type);
2637               group->vuses.ordered_remove (j);
2638               new_group->vuses.safe_push (next);
2639               continue;
2640             }
2641
2642           next->id = j;
2643           next->group_id = group->id;
2644           j++;
2645         }
2646     }
2647 }
2648
2649 /* Finds uses of the induction variables that are interesting.  */
2650
2651 static void
2652 find_interesting_uses (struct ivopts_data *data)
2653 {
2654   basic_block bb;
2655   gimple_stmt_iterator bsi;
2656   basic_block *body = get_loop_body (data->current_loop);
2657   unsigned i;
2658   edge e;
2659
2660   for (i = 0; i < data->current_loop->num_nodes; i++)
2661     {
2662       edge_iterator ei;
2663       bb = body[i];
2664
2665       FOR_EACH_EDGE (e, ei, bb->succs)
2666         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2667             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2668           find_interesting_uses_outside (data, e);
2669
2670       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2671         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2672       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2673         if (!is_gimple_debug (gsi_stmt (bsi)))
2674           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2675     }
2676   free (body);
2677
2678   split_address_groups (data);
2679
2680   if (dump_file && (dump_flags & TDF_DETAILS))
2681     {
2682       fprintf (dump_file, "\n<IV Groups>:\n");
2683       dump_groups (dump_file, data);
2684       fprintf (dump_file, "\n");
2685     }
2686 }
2687
2688 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2689    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2690    we are at the top-level of the processed address.  */
2691
2692 static tree
2693 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2694                 HOST_WIDE_INT *offset)
2695 {
2696   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2697   enum tree_code code;
2698   tree type, orig_type = TREE_TYPE (expr);
2699   HOST_WIDE_INT off0, off1, st;
2700   tree orig_expr = expr;
2701
2702   STRIP_NOPS (expr);
2703
2704   type = TREE_TYPE (expr);
2705   code = TREE_CODE (expr);
2706   *offset = 0;
2707
2708   switch (code)
2709     {
2710     case INTEGER_CST:
2711       if (!cst_and_fits_in_hwi (expr)
2712           || integer_zerop (expr))
2713         return orig_expr;
2714
2715       *offset = int_cst_value (expr);
2716       return build_int_cst (orig_type, 0);
2717
2718     case POINTER_PLUS_EXPR:
2719     case PLUS_EXPR:
2720     case MINUS_EXPR:
2721       op0 = TREE_OPERAND (expr, 0);
2722       op1 = TREE_OPERAND (expr, 1);
2723
2724       op0 = strip_offset_1 (op0, false, false, &off0);
2725       op1 = strip_offset_1 (op1, false, false, &off1);
2726
2727       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2728       if (op0 == TREE_OPERAND (expr, 0)
2729           && op1 == TREE_OPERAND (expr, 1))
2730         return orig_expr;
2731
2732       if (integer_zerop (op1))
2733         expr = op0;
2734       else if (integer_zerop (op0))
2735         {
2736           if (code == MINUS_EXPR)
2737             expr = fold_build1 (NEGATE_EXPR, type, op1);
2738           else
2739             expr = op1;
2740         }
2741       else
2742         expr = fold_build2 (code, type, op0, op1);
2743
2744       return fold_convert (orig_type, expr);
2745
2746     case MULT_EXPR:
2747       op1 = TREE_OPERAND (expr, 1);
2748       if (!cst_and_fits_in_hwi (op1))
2749         return orig_expr;
2750
2751       op0 = TREE_OPERAND (expr, 0);
2752       op0 = strip_offset_1 (op0, false, false, &off0);
2753       if (op0 == TREE_OPERAND (expr, 0))
2754         return orig_expr;
2755
2756       *offset = off0 * int_cst_value (op1);
2757       if (integer_zerop (op0))
2758         expr = op0;
2759       else
2760         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2761
2762       return fold_convert (orig_type, expr);
2763
2764     case ARRAY_REF:
2765     case ARRAY_RANGE_REF:
2766       if (!inside_addr)
2767         return orig_expr;
2768
2769       step = array_ref_element_size (expr);
2770       if (!cst_and_fits_in_hwi (step))
2771         break;
2772
2773       st = int_cst_value (step);
2774       op1 = TREE_OPERAND (expr, 1);
2775       op1 = strip_offset_1 (op1, false, false, &off1);
2776       *offset = off1 * st;
2777
2778       if (top_compref
2779           && integer_zerop (op1))
2780         {
2781           /* Strip the component reference completely.  */
2782           op0 = TREE_OPERAND (expr, 0);
2783           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2784           *offset += off0;
2785           return op0;
2786         }
2787       break;
2788
2789     case COMPONENT_REF:
2790       {
2791         tree field;
2792
2793         if (!inside_addr)
2794           return orig_expr;
2795
2796         tmp = component_ref_field_offset (expr);
2797         field = TREE_OPERAND (expr, 1);
2798         if (top_compref
2799             && cst_and_fits_in_hwi (tmp)
2800             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2801           {
2802             HOST_WIDE_INT boffset, abs_off;
2803
2804             /* Strip the component reference completely.  */
2805             op0 = TREE_OPERAND (expr, 0);
2806             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2807             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2808             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2809             if (boffset < 0)
2810               abs_off = -abs_off;
2811
2812             *offset = off0 + int_cst_value (tmp) + abs_off;
2813             return op0;
2814           }
2815       }
2816       break;
2817
2818     case ADDR_EXPR:
2819       op0 = TREE_OPERAND (expr, 0);
2820       op0 = strip_offset_1 (op0, true, true, &off0);
2821       *offset += off0;
2822
2823       if (op0 == TREE_OPERAND (expr, 0))
2824         return orig_expr;
2825
2826       expr = build_fold_addr_expr (op0);
2827       return fold_convert (orig_type, expr);
2828
2829     case MEM_REF:
2830       /* ???  Offset operand?  */
2831       inside_addr = false;
2832       break;
2833
2834     default:
2835       return orig_expr;
2836     }
2837
2838   /* Default handling of expressions for that we want to recurse into
2839      the first operand.  */
2840   op0 = TREE_OPERAND (expr, 0);
2841   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2842   *offset += off0;
2843
2844   if (op0 == TREE_OPERAND (expr, 0)
2845       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2846     return orig_expr;
2847
2848   expr = copy_node (expr);
2849   TREE_OPERAND (expr, 0) = op0;
2850   if (op1)
2851     TREE_OPERAND (expr, 1) = op1;
2852
2853   /* Inside address, we might strip the top level component references,
2854      thus changing type of the expression.  Handling of ADDR_EXPR
2855      will fix that.  */
2856   expr = fold_convert (orig_type, expr);
2857
2858   return expr;
2859 }
2860
2861 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2862
2863 tree
2864 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2865 {
2866   HOST_WIDE_INT off;
2867   tree core = strip_offset_1 (expr, false, false, &off);
2868   *offset = off;
2869   return core;
2870 }
2871
2872 /* Returns variant of TYPE that can be used as base for different uses.
2873    We return unsigned type with the same precision, which avoids problems
2874    with overflows.  */
2875
2876 static tree
2877 generic_type_for (tree type)
2878 {
2879   if (POINTER_TYPE_P (type))
2880     return unsigned_type_for (type);
2881
2882   if (TYPE_UNSIGNED (type))
2883     return type;
2884
2885   return unsigned_type_for (type);
2886 }
2887
2888 /* Private data for walk_tree.  */
2889
2890 struct walk_tree_data
2891 {
2892   bitmap *inv_vars;
2893   struct ivopts_data *idata;
2894 };
2895
2896 /* Callback function for walk_tree, it records invariants and symbol
2897    reference in *EXPR_P.  DATA is the structure storing result info.  */
2898
2899 static tree
2900 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2901 {
2902   tree op = *expr_p;
2903   struct version_info *info;
2904   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2905
2906   if (TREE_CODE (op) != SSA_NAME)
2907     return NULL_TREE;
2908
2909   info = name_info (wdata->idata, op);
2910   /* Because we expand simple operations when finding IVs, loop invariant
2911      variable that isn't referred by the original loop could be used now.
2912      Record such invariant variables here.  */
2913   if (!info->iv)
2914     {
2915       struct ivopts_data *idata = wdata->idata;
2916       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2917
2918       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2919         {
2920           set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2921           record_invariant (idata, op, false);
2922         }
2923     }
2924   if (!info->inv_id || info->has_nonlin_use)
2925     return NULL_TREE;
2926
2927   if (!*wdata->inv_vars)
2928     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2929   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2930
2931   return NULL_TREE;
2932 }
2933
2934 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2935    store it.  */
2936
2937 static inline void
2938 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2939 {
2940   struct walk_tree_data wdata;
2941
2942   if (!inv_vars)
2943     return;
2944
2945   wdata.idata = data;
2946   wdata.inv_vars = inv_vars;
2947   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2948 }
2949
2950 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
2951    will be recorded if it doesn't exist yet.  Given below two exprs:
2952      inv_expr + cst1, inv_expr + cst2
2953    It's hard to make decision whether constant part should be stripped
2954    or not.  We choose to not strip based on below facts:
2955      1) We need to count ADD cost for constant part if it's stripped,
2956         which is't always trivial where this functions is called.
2957      2) Stripping constant away may be conflict with following loop
2958         invariant hoisting pass.
2959      3) Not stripping constant away results in more invariant exprs,
2960         which usually leads to decision preferring lower reg pressure.  */
2961
2962 static iv_inv_expr_ent *
2963 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2964 {
2965   STRIP_NOPS (inv_expr);
2966
2967   if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME)
2968     return NULL;
2969
2970   /* Don't strip constant part away as we used to.  */
2971
2972   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
2973   struct iv_inv_expr_ent ent;
2974   ent.expr = inv_expr;
2975   ent.hash = iterative_hash_expr (inv_expr, 0);
2976   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2977
2978   if (!*slot)
2979     {
2980       *slot = XNEW (struct iv_inv_expr_ent);
2981       (*slot)->expr = inv_expr;
2982       (*slot)->hash = ent.hash;
2983       (*slot)->id = ++data->max_inv_expr_id;
2984     }
2985
2986   return *slot;
2987 }
2988
2989 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2990    position to POS.  If USE is not NULL, the candidate is set as related to
2991    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2992    replacement of the final value of the iv by a direct computation.  */
2993
2994 static struct iv_cand *
2995 add_candidate_1 (struct ivopts_data *data,
2996                  tree base, tree step, bool important, enum iv_position pos,
2997                  struct iv_use *use, gimple *incremented_at,
2998                  struct iv *orig_iv = NULL)
2999 {
3000   unsigned i;
3001   struct iv_cand *cand = NULL;
3002   tree type, orig_type;
3003
3004   gcc_assert (base && step);
3005
3006   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3007      live, but the ivopts code may replace a real pointer with one
3008      pointing before or after the memory block that is then adjusted
3009      into the memory block during the loop.  FIXME: It would likely be
3010      better to actually force the pointer live and still use ivopts;
3011      for example, it would be enough to write the pointer into memory
3012      and keep it there until after the loop.  */
3013   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3014     return NULL;
3015
3016   /* For non-original variables, make sure their values are computed in a type
3017      that does not invoke undefined behavior on overflows (since in general,
3018      we cannot prove that these induction variables are non-wrapping).  */
3019   if (pos != IP_ORIGINAL)
3020     {
3021       orig_type = TREE_TYPE (base);
3022       type = generic_type_for (orig_type);
3023       if (type != orig_type)
3024         {
3025           base = fold_convert (type, base);
3026           step = fold_convert (type, step);
3027         }
3028     }
3029
3030   for (i = 0; i < data->vcands.length (); i++)
3031     {
3032       cand = data->vcands[i];
3033
3034       if (cand->pos != pos)
3035         continue;
3036
3037       if (cand->incremented_at != incremented_at
3038           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3039               && cand->ainc_use != use))
3040         continue;
3041
3042       if (operand_equal_p (base, cand->iv->base, 0)
3043           && operand_equal_p (step, cand->iv->step, 0)
3044           && (TYPE_PRECISION (TREE_TYPE (base))
3045               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3046         break;
3047     }
3048
3049   if (i == data->vcands.length ())
3050     {
3051       cand = XCNEW (struct iv_cand);
3052       cand->id = i;
3053       cand->iv = alloc_iv (data, base, step);
3054       cand->pos = pos;
3055       if (pos != IP_ORIGINAL)
3056         {
3057           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3058           cand->var_after = cand->var_before;
3059         }
3060       cand->important = important;
3061       cand->incremented_at = incremented_at;
3062       data->vcands.safe_push (cand);
3063
3064       if (TREE_CODE (step) != INTEGER_CST)
3065         {
3066           find_inv_vars (data, &step, &cand->inv_vars);
3067
3068           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3069           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3070           if (inv_expr != NULL)
3071             {
3072               cand->inv_exprs = cand->inv_vars;
3073               cand->inv_vars = NULL;
3074               if (cand->inv_exprs)
3075                 bitmap_clear (cand->inv_exprs);
3076               else
3077                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3078
3079               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3080             }
3081         }
3082
3083       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3084         cand->ainc_use = use;
3085       else
3086         cand->ainc_use = NULL;
3087
3088       cand->orig_iv = orig_iv;
3089       if (dump_file && (dump_flags & TDF_DETAILS))
3090         dump_cand (dump_file, cand);
3091     }
3092
3093   cand->important |= important;
3094
3095   /* Relate candidate to the group for which it is added.  */
3096   if (use)
3097     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3098
3099   return cand;
3100 }
3101
3102 /* Returns true if incrementing the induction variable at the end of the LOOP
3103    is allowed.
3104
3105    The purpose is to avoid splitting latch edge with a biv increment, thus
3106    creating a jump, possibly confusing other optimization passes and leaving
3107    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3108    available (so we do not have a better alternative), or if the latch edge
3109    is already nonempty.  */
3110
3111 static bool
3112 allow_ip_end_pos_p (struct loop *loop)
3113 {
3114   if (!ip_normal_pos (loop))
3115     return true;
3116
3117   if (!empty_block_p (ip_end_pos (loop)))
3118     return true;
3119
3120   return false;
3121 }
3122
3123 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3124    Important field is set to IMPORTANT.  */
3125
3126 static void
3127 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3128                         bool important, struct iv_use *use)
3129 {
3130   basic_block use_bb = gimple_bb (use->stmt);
3131   machine_mode mem_mode;
3132   unsigned HOST_WIDE_INT cstepi;
3133
3134   /* If we insert the increment in any position other than the standard
3135      ones, we must ensure that it is incremented once per iteration.
3136      It must not be in an inner nested loop, or one side of an if
3137      statement.  */
3138   if (use_bb->loop_father != data->current_loop
3139       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3140       || stmt_can_throw_internal (use->stmt)
3141       || !cst_and_fits_in_hwi (step))
3142     return;
3143
3144   cstepi = int_cst_value (step);
3145
3146   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3147   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3148         || USE_STORE_PRE_INCREMENT (mem_mode))
3149        && GET_MODE_SIZE (mem_mode) == cstepi)
3150       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3151            || USE_STORE_PRE_DECREMENT (mem_mode))
3152           && GET_MODE_SIZE (mem_mode) == -cstepi))
3153     {
3154       enum tree_code code = MINUS_EXPR;
3155       tree new_base;
3156       tree new_step = step;
3157
3158       if (POINTER_TYPE_P (TREE_TYPE (base)))
3159         {
3160           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3161           code = POINTER_PLUS_EXPR;
3162         }
3163       else
3164         new_step = fold_convert (TREE_TYPE (base), new_step);
3165       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3166       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3167                        use->stmt);
3168     }
3169   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3170         || USE_STORE_POST_INCREMENT (mem_mode))
3171        && GET_MODE_SIZE (mem_mode) == cstepi)
3172       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3173            || USE_STORE_POST_DECREMENT (mem_mode))
3174           && GET_MODE_SIZE (mem_mode) == -cstepi))
3175     {
3176       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3177                        use->stmt);
3178     }
3179 }
3180
3181 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3182    position to POS.  If USE is not NULL, the candidate is set as related to
3183    it.  The candidate computation is scheduled before exit condition and at
3184    the end of loop.  */
3185
3186 static void
3187 add_candidate (struct ivopts_data *data,
3188                tree base, tree step, bool important, struct iv_use *use,
3189                struct iv *orig_iv = NULL)
3190 {
3191   if (ip_normal_pos (data->current_loop))
3192     add_candidate_1 (data, base, step, important,
3193                      IP_NORMAL, use, NULL, orig_iv);
3194   if (ip_end_pos (data->current_loop)
3195       && allow_ip_end_pos_p (data->current_loop))
3196     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3197 }
3198
3199 /* Adds standard iv candidates.  */
3200
3201 static void
3202 add_standard_iv_candidates (struct ivopts_data *data)
3203 {
3204   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3205
3206   /* The same for a double-integer type if it is still fast enough.  */
3207   if (TYPE_PRECISION
3208         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3209       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3210     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3211                    build_int_cst (long_integer_type_node, 1), true, NULL);
3212
3213   /* The same for a double-integer type if it is still fast enough.  */
3214   if (TYPE_PRECISION
3215         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3216       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3217     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3218                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3219 }
3220
3221
3222 /* Adds candidates bases on the old induction variable IV.  */
3223
3224 static void
3225 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3226 {
3227   gimple *phi;
3228   tree def;
3229   struct iv_cand *cand;
3230
3231   /* Check if this biv is used in address type use.  */
3232   if (iv->no_overflow  && iv->have_address_use
3233       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3234       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3235     {
3236       tree base = fold_convert (sizetype, iv->base);
3237       tree step = fold_convert (sizetype, iv->step);
3238
3239       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3240       add_candidate (data, base, step, true, NULL, iv);
3241       /* Add iv cand of the original type only if it has nonlinear use.  */
3242       if (iv->nonlin_use)
3243         add_candidate (data, iv->base, iv->step, true, NULL);
3244     }
3245   else
3246     add_candidate (data, iv->base, iv->step, true, NULL);
3247
3248   /* The same, but with initial value zero.  */
3249   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3250     add_candidate (data, size_int (0), iv->step, true, NULL);
3251   else
3252     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3253                    iv->step, true, NULL);
3254
3255   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3256   if (gimple_code (phi) == GIMPLE_PHI)
3257     {
3258       /* Additionally record the possibility of leaving the original iv
3259          untouched.  */
3260       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3261       /* Don't add candidate if it's from another PHI node because
3262          it's an affine iv appearing in the form of PEELED_CHREC.  */
3263       phi = SSA_NAME_DEF_STMT (def);
3264       if (gimple_code (phi) != GIMPLE_PHI)
3265         {
3266           cand = add_candidate_1 (data,
3267                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3268                                   SSA_NAME_DEF_STMT (def));
3269           if (cand)
3270             {
3271               cand->var_before = iv->ssa_name;
3272               cand->var_after = def;
3273             }
3274         }
3275       else
3276         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3277     }
3278 }
3279
3280 /* Adds candidates based on the old induction variables.  */
3281
3282 static void
3283 add_iv_candidate_for_bivs (struct ivopts_data *data)
3284 {
3285   unsigned i;
3286   struct iv *iv;
3287   bitmap_iterator bi;
3288
3289   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3290     {
3291       iv = ver_info (data, i)->iv;
3292       if (iv && iv->biv_p && !integer_zerop (iv->step))
3293         add_iv_candidate_for_biv (data, iv);
3294     }
3295 }
3296
3297 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3298
3299 static void
3300 record_common_cand (struct ivopts_data *data, tree base,
3301                     tree step, struct iv_use *use)
3302 {
3303   struct iv_common_cand ent;
3304   struct iv_common_cand **slot;
3305
3306   ent.base = base;
3307   ent.step = step;
3308   ent.hash = iterative_hash_expr (base, 0);
3309   ent.hash = iterative_hash_expr (step, ent.hash);
3310
3311   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3312   if (*slot == NULL)
3313     {
3314       *slot = new iv_common_cand ();
3315       (*slot)->base = base;
3316       (*slot)->step = step;
3317       (*slot)->uses.create (8);
3318       (*slot)->hash = ent.hash;
3319       data->iv_common_cands.safe_push ((*slot));
3320     }
3321
3322   gcc_assert (use != NULL);
3323   (*slot)->uses.safe_push (use);
3324   return;
3325 }
3326
3327 /* Comparison function used to sort common candidates.  */
3328
3329 static int
3330 common_cand_cmp (const void *p1, const void *p2)
3331 {
3332   unsigned n1, n2;
3333   const struct iv_common_cand *const *const ccand1
3334     = (const struct iv_common_cand *const *)p1;
3335   const struct iv_common_cand *const *const ccand2
3336     = (const struct iv_common_cand *const *)p2;
3337
3338   n1 = (*ccand1)->uses.length ();
3339   n2 = (*ccand2)->uses.length ();
3340   return n2 - n1;
3341 }
3342
3343 /* Adds IV candidates based on common candidated recorded.  */
3344
3345 static void
3346 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3347 {
3348   unsigned i, j;
3349   struct iv_cand *cand_1, *cand_2;
3350
3351   data->iv_common_cands.qsort (common_cand_cmp);
3352   for (i = 0; i < data->iv_common_cands.length (); i++)
3353     {
3354       struct iv_common_cand *ptr = data->iv_common_cands[i];
3355
3356       /* Only add IV candidate if it's derived from multiple uses.  */
3357       if (ptr->uses.length () <= 1)
3358         break;
3359
3360       cand_1 = NULL;
3361       cand_2 = NULL;
3362       if (ip_normal_pos (data->current_loop))
3363         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3364                                   false, IP_NORMAL, NULL, NULL);
3365
3366       if (ip_end_pos (data->current_loop)
3367           && allow_ip_end_pos_p (data->current_loop))
3368         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3369                                   false, IP_END, NULL, NULL);
3370
3371       /* Bind deriving uses and the new candidates.  */
3372       for (j = 0; j < ptr->uses.length (); j++)
3373         {
3374           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3375           if (cand_1)
3376             bitmap_set_bit (group->related_cands, cand_1->id);
3377           if (cand_2)
3378             bitmap_set_bit (group->related_cands, cand_2->id);
3379         }
3380     }
3381
3382   /* Release data since it is useless from this point.  */
3383   data->iv_common_cand_tab->empty ();
3384   data->iv_common_cands.truncate (0);
3385 }
3386
3387 /* Adds candidates based on the value of USE's iv.  */
3388
3389 static void
3390 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3391 {
3392   unsigned HOST_WIDE_INT offset;
3393   tree base;
3394   tree basetype;
3395   struct iv *iv = use->iv;
3396
3397   add_candidate (data, iv->base, iv->step, false, use);
3398
3399   /* Record common candidate for use in case it can be shared by others.  */
3400   record_common_cand (data, iv->base, iv->step, use);
3401
3402   /* Record common candidate with initial value zero.  */
3403   basetype = TREE_TYPE (iv->base);
3404   if (POINTER_TYPE_P (basetype))
3405     basetype = sizetype;
3406   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3407
3408   /* Record common candidate with constant offset stripped in base.
3409      Like the use itself, we also add candidate directly for it.  */
3410   base = strip_offset (iv->base, &offset);
3411   if (offset || base != iv->base)
3412     {
3413       record_common_cand (data, base, iv->step, use);
3414       add_candidate (data, base, iv->step, false, use);
3415     }
3416
3417   /* Record common candidate with base_object removed in base.  */
3418   base = iv->base;
3419   STRIP_NOPS (base);
3420   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3421     {
3422       tree step = iv->step;
3423
3424       STRIP_NOPS (step);
3425       base = TREE_OPERAND (base, 1);
3426       step = fold_convert (sizetype, step);
3427       record_common_cand (data, base, step, use);
3428       /* Also record common candidate with offset stripped.  */
3429       base = strip_offset (base, &offset);
3430       if (offset)
3431         record_common_cand (data, base, step, use);
3432     }
3433
3434   /* At last, add auto-incremental candidates.  Make such variables
3435      important since other iv uses with same base object may be based
3436      on it.  */
3437   if (use != NULL && use->type == USE_ADDRESS)
3438     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3439 }
3440
3441 /* Adds candidates based on the uses.  */
3442
3443 static void
3444 add_iv_candidate_for_groups (struct ivopts_data *data)
3445 {
3446   unsigned i;
3447
3448   /* Only add candidate for the first use in group.  */
3449   for (i = 0; i < data->vgroups.length (); i++)
3450     {
3451       struct iv_group *group = data->vgroups[i];
3452
3453       gcc_assert (group->vuses[0] != NULL);
3454       add_iv_candidate_for_use (data, group->vuses[0]);
3455     }
3456   add_iv_candidate_derived_from_uses (data);
3457 }
3458
3459 /* Record important candidates and add them to related_cands bitmaps.  */
3460
3461 static void
3462 record_important_candidates (struct ivopts_data *data)
3463 {
3464   unsigned i;
3465   struct iv_group *group;
3466
3467   for (i = 0; i < data->vcands.length (); i++)
3468     {
3469       struct iv_cand *cand = data->vcands[i];
3470
3471       if (cand->important)
3472         bitmap_set_bit (data->important_candidates, i);
3473     }
3474
3475   data->consider_all_candidates = (data->vcands.length ()
3476                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3477
3478   /* Add important candidates to groups' related_cands bitmaps.  */
3479   for (i = 0; i < data->vgroups.length (); i++)
3480     {
3481       group = data->vgroups[i];
3482       bitmap_ior_into (group->related_cands, data->important_candidates);
3483     }
3484 }
3485
3486 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3487    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3488    we allocate a simple list to every use.  */
3489
3490 static void
3491 alloc_use_cost_map (struct ivopts_data *data)
3492 {
3493   unsigned i, size, s;
3494
3495   for (i = 0; i < data->vgroups.length (); i++)
3496     {
3497       struct iv_group *group = data->vgroups[i];
3498
3499       if (data->consider_all_candidates)
3500         size = data->vcands.length ();
3501       else
3502         {
3503           s = bitmap_count_bits (group->related_cands);
3504
3505           /* Round up to the power of two, so that moduling by it is fast.  */
3506           size = s ? (1 << ceil_log2 (s)) : 1;
3507         }
3508
3509       group->n_map_members = size;
3510       group->cost_map = XCNEWVEC (struct cost_pair, size);
3511     }
3512 }
3513
3514 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3515    on invariants INV_VARS and that the value used in expressing it is
3516    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3517
3518 static void
3519 set_group_iv_cost (struct ivopts_data *data,
3520                    struct iv_group *group, struct iv_cand *cand,
3521                    comp_cost cost, bitmap inv_vars, tree value,
3522                    enum tree_code comp, bitmap inv_exprs)
3523 {
3524   unsigned i, s;
3525
3526   if (cost.infinite_cost_p ())
3527     {
3528       BITMAP_FREE (inv_vars);
3529       BITMAP_FREE (inv_exprs);
3530       return;
3531     }
3532
3533   if (data->consider_all_candidates)
3534     {
3535       group->cost_map[cand->id].cand = cand;
3536       group->cost_map[cand->id].cost = cost;
3537       group->cost_map[cand->id].inv_vars = inv_vars;
3538       group->cost_map[cand->id].inv_exprs = inv_exprs;
3539       group->cost_map[cand->id].value = value;
3540       group->cost_map[cand->id].comp = comp;
3541       return;
3542     }
3543
3544   /* n_map_members is a power of two, so this computes modulo.  */
3545   s = cand->id & (group->n_map_members - 1);
3546   for (i = s; i < group->n_map_members; i++)
3547     if (!group->cost_map[i].cand)
3548       goto found;
3549   for (i = 0; i < s; i++)
3550     if (!group->cost_map[i].cand)
3551       goto found;
3552
3553   gcc_unreachable ();
3554
3555 found:
3556   group->cost_map[i].cand = cand;
3557   group->cost_map[i].cost = cost;
3558   group->cost_map[i].inv_vars = inv_vars;
3559   group->cost_map[i].inv_exprs = inv_exprs;
3560   group->cost_map[i].value = value;
3561   group->cost_map[i].comp = comp;
3562 }
3563
3564 /* Gets cost of (GROUP, CAND) pair.  */
3565
3566 static struct cost_pair *
3567 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3568                    struct iv_cand *cand)
3569 {
3570   unsigned i, s;
3571   struct cost_pair *ret;
3572
3573   if (!cand)
3574     return NULL;
3575
3576   if (data->consider_all_candidates)
3577     {
3578       ret = group->cost_map + cand->id;
3579       if (!ret->cand)
3580         return NULL;
3581
3582       return ret;
3583     }
3584
3585   /* n_map_members is a power of two, so this computes modulo.  */
3586   s = cand->id & (group->n_map_members - 1);
3587   for (i = s; i < group->n_map_members; i++)
3588     if (group->cost_map[i].cand == cand)
3589       return group->cost_map + i;
3590     else if (group->cost_map[i].cand == NULL)
3591       return NULL;
3592   for (i = 0; i < s; i++)
3593     if (group->cost_map[i].cand == cand)
3594       return group->cost_map + i;
3595     else if (group->cost_map[i].cand == NULL)
3596       return NULL;
3597
3598   return NULL;
3599 }
3600
3601 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3602 static rtx
3603 produce_memory_decl_rtl (tree obj, int *regno)
3604 {
3605   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3606   machine_mode address_mode = targetm.addr_space.address_mode (as);
3607   rtx x;
3608
3609   gcc_assert (obj);
3610   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3611     {
3612       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3613       x = gen_rtx_SYMBOL_REF (address_mode, name);
3614       SET_SYMBOL_REF_DECL (x, obj);
3615       x = gen_rtx_MEM (DECL_MODE (obj), x);
3616       set_mem_addr_space (x, as);
3617       targetm.encode_section_info (obj, x, true);
3618     }
3619   else
3620     {
3621       x = gen_raw_REG (address_mode, (*regno)++);
3622       x = gen_rtx_MEM (DECL_MODE (obj), x);
3623       set_mem_addr_space (x, as);
3624     }
3625
3626   return x;
3627 }
3628
3629 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3630    walk_tree.  DATA contains the actual fake register number.  */
3631
3632 static tree
3633 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3634 {
3635   tree obj = NULL_TREE;
3636   rtx x = NULL_RTX;
3637   int *regno = (int *) data;
3638
3639   switch (TREE_CODE (*expr_p))
3640     {
3641     case ADDR_EXPR:
3642       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3643            handled_component_p (*expr_p);
3644            expr_p = &TREE_OPERAND (*expr_p, 0))
3645         continue;
3646       obj = *expr_p;
3647       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3648         x = produce_memory_decl_rtl (obj, regno);
3649       break;
3650
3651     case SSA_NAME:
3652       *ws = 0;
3653       obj = SSA_NAME_VAR (*expr_p);
3654       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3655       if (!obj)
3656         return NULL_TREE;
3657       if (!DECL_RTL_SET_P (obj))
3658         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3659       break;
3660
3661     case VAR_DECL:
3662     case PARM_DECL:
3663     case RESULT_DECL:
3664       *ws = 0;
3665       obj = *expr_p;
3666
3667       if (DECL_RTL_SET_P (obj))
3668         break;
3669
3670       if (DECL_MODE (obj) == BLKmode)
3671         x = produce_memory_decl_rtl (obj, regno);
3672       else
3673         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3674
3675       break;
3676
3677     default:
3678       break;
3679     }
3680
3681   if (x)
3682     {
3683       decl_rtl_to_reset.safe_push (obj);
3684       SET_DECL_RTL (obj, x);
3685     }
3686
3687   return NULL_TREE;
3688 }
3689
3690 /* Determines cost of the computation of EXPR.  */
3691
3692 static unsigned
3693 computation_cost (tree expr, bool speed)
3694 {
3695   rtx_insn *seq;
3696   rtx rslt;
3697   tree type = TREE_TYPE (expr);
3698   unsigned cost;
3699   /* Avoid using hard regs in ways which may be unsupported.  */
3700   int regno = LAST_VIRTUAL_REGISTER + 1;
3701   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3702   enum node_frequency real_frequency = node->frequency;
3703
3704   node->frequency = NODE_FREQUENCY_NORMAL;
3705   crtl->maybe_hot_insn_p = speed;
3706   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3707   start_sequence ();
3708   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3709   seq = get_insns ();
3710   end_sequence ();
3711   default_rtl_profile ();
3712   node->frequency = real_frequency;
3713
3714   cost = seq_cost (seq, speed);
3715   if (MEM_P (rslt))
3716     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3717                           TYPE_ADDR_SPACE (type), speed);
3718   else if (!REG_P (rslt))
3719     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3720
3721   return cost;
3722 }
3723
3724 /* Returns variable containing the value of candidate CAND at statement AT.  */
3725
3726 static tree
3727 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3728 {
3729   if (stmt_after_increment (loop, cand, stmt))
3730     return cand->var_after;
3731   else
3732     return cand->var_before;
3733 }
3734
3735 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3736    same precision that is at least as wide as the precision of TYPE, stores
3737    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3738    type of A and B.  */
3739
3740 static tree
3741 determine_common_wider_type (tree *a, tree *b)
3742 {
3743   tree wider_type = NULL;
3744   tree suba, subb;
3745   tree atype = TREE_TYPE (*a);
3746
3747   if (CONVERT_EXPR_P (*a))
3748     {
3749       suba = TREE_OPERAND (*a, 0);
3750       wider_type = TREE_TYPE (suba);
3751       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3752         return atype;
3753     }
3754   else
3755     return atype;
3756
3757   if (CONVERT_EXPR_P (*b))
3758     {
3759       subb = TREE_OPERAND (*b, 0);
3760       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3761         return atype;
3762     }
3763   else
3764     return atype;
3765
3766   *a = suba;
3767   *b = subb;
3768   return wider_type;
3769 }
3770
3771 /* Determines the expression by that USE is expressed from induction variable
3772    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3773    decomposed form.  The invariant part is stored in AFF_INV; while variant
3774    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3775    non-null.  Returns false if USE cannot be expressed using CAND.  */
3776
3777 static bool
3778 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3779                        struct iv_cand *cand, struct aff_tree *aff_inv,
3780                        struct aff_tree *aff_var, widest_int *prat = NULL)
3781 {
3782   tree ubase = use->iv->base, ustep = use->iv->step;
3783   tree cbase = cand->iv->base, cstep = cand->iv->step;
3784   tree common_type, uutype, var, cstep_common;
3785   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3786   aff_tree aff_cbase;
3787   widest_int rat;
3788
3789   /* We must have a precision to express the values of use.  */
3790   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3791     return false;
3792
3793   var = var_at_stmt (loop, cand, at);
3794   uutype = unsigned_type_for (utype);
3795
3796   /* If the conversion is not noop, perform it.  */
3797   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3798     {
3799       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3800           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3801         {
3802           tree inner_base, inner_step, inner_type;
3803           inner_base = TREE_OPERAND (cbase, 0);
3804           if (CONVERT_EXPR_P (cstep))
3805             inner_step = TREE_OPERAND (cstep, 0);
3806           else
3807             inner_step = cstep;
3808
3809           inner_type = TREE_TYPE (inner_base);
3810           /* If candidate is added from a biv whose type is smaller than
3811              ctype, we know both candidate and the biv won't overflow.
3812              In this case, it's safe to skip the convertion in candidate.
3813              As an example, (unsigned short)((unsigned long)A) equals to
3814              (unsigned short)A, if A has a type no larger than short.  */
3815           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3816             {
3817               cbase = inner_base;
3818               cstep = inner_step;
3819             }
3820         }
3821       cbase = fold_convert (uutype, cbase);
3822       cstep = fold_convert (uutype, cstep);
3823       var = fold_convert (uutype, var);
3824     }
3825
3826   /* Ratio is 1 when computing the value of biv cand by itself.
3827      We can't rely on constant_multiple_of in this case because the
3828      use is created after the original biv is selected.  The call
3829      could fail because of inconsistent fold behavior.  See PR68021
3830      for more information.  */
3831   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3832     {
3833       gcc_assert (is_gimple_assign (use->stmt));
3834       gcc_assert (use->iv->ssa_name == cand->var_after);
3835       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3836       rat = 1;
3837     }
3838   else if (!constant_multiple_of (ustep, cstep, &rat))
3839     return false;
3840
3841   if (prat)
3842     *prat = rat;
3843
3844   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3845      type, we achieve better folding by computing their difference in this
3846      wider type, and cast the result to UUTYPE.  We do not need to worry about
3847      overflows, as all the arithmetics will in the end be performed in UUTYPE
3848      anyway.  */
3849   common_type = determine_common_wider_type (&ubase, &cbase);
3850
3851   /* use = ubase - ratio * cbase + ratio * var.  */
3852   tree_to_aff_combination (ubase, common_type, aff_inv);
3853   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3854   tree_to_aff_combination (var, uutype, aff_var);
3855
3856   /* We need to shift the value if we are after the increment.  */
3857   if (stmt_after_increment (loop, cand, at))
3858     {
3859       aff_tree cstep_aff;
3860
3861       if (common_type != uutype)
3862         cstep_common = fold_convert (common_type, cstep);
3863       else
3864         cstep_common = cstep;
3865
3866       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3867       aff_combination_add (&aff_cbase, &cstep_aff);
3868     }
3869
3870   aff_combination_scale (&aff_cbase, -rat);
3871   aff_combination_add (aff_inv, &aff_cbase);
3872   if (common_type != uutype)
3873     aff_combination_convert (aff_inv, uutype);
3874
3875   aff_combination_scale (aff_var, rat);
3876   return true;
3877 }
3878
3879 /* Determines the expression by that USE is expressed from induction variable
3880    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3881    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3882
3883 static bool
3884 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3885                      struct iv_cand *cand, struct aff_tree *aff)
3886 {
3887   aff_tree aff_var;
3888
3889   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3890     return false;
3891
3892   aff_combination_add (aff, &aff_var);
3893   return true;
3894 }
3895
3896 /* Return the type of USE.  */
3897
3898 static tree
3899 get_use_type (struct iv_use *use)
3900 {
3901   tree base_type = TREE_TYPE (use->iv->base);
3902   tree type;
3903
3904   if (use->type == USE_ADDRESS)
3905     {
3906       /* The base_type may be a void pointer.  Create a pointer type based on
3907          the mem_ref instead.  */
3908       type = build_pointer_type (TREE_TYPE (*use->op_p));
3909       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3910                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3911     }
3912   else
3913     type = base_type;
3914
3915   return type;
3916 }
3917
3918 /* Determines the expression by that USE is expressed from induction variable
3919    CAND at statement AT in LOOP.  The computation is unshared.  */
3920
3921 static tree
3922 get_computation_at (struct loop *loop, gimple *at,
3923                     struct iv_use *use, struct iv_cand *cand)
3924 {
3925   aff_tree aff;
3926   tree type = get_use_type (use);
3927
3928   if (!get_computation_aff (loop, at, use, cand, &aff))
3929     return NULL_TREE;
3930   unshare_aff_combination (&aff);
3931   return fold_convert (type, aff_combination_to_tree (&aff));
3932 }
3933
3934 /* Adjust the cost COST for being in loop setup rather than loop body.
3935    If we're optimizing for space, the loop setup overhead is constant;
3936    if we're optimizing for speed, amortize it over the per-iteration cost.
3937    If ROUND_UP_P is true, the result is round up rather than to zero when
3938    optimizing for speed.  */
3939 static unsigned
3940 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3941                    bool round_up_p = false)
3942 {
3943   if (cost == INFTY)
3944     return cost;
3945   else if (optimize_loop_for_speed_p (data->current_loop))
3946     {
3947       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3948       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3949     }
3950   else
3951     return cost;
3952 }
3953
3954 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3955    EXPR operand holding the shift.  COST0 and COST1 are the costs for
3956    calculating the operands of EXPR.  Returns true if successful, and returns
3957    the cost in COST.  */
3958
3959 static bool
3960 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
3961                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3962 {
3963   comp_cost res;
3964   tree op1 = TREE_OPERAND (expr, 1);
3965   tree cst = TREE_OPERAND (mult, 1);
3966   tree multop = TREE_OPERAND (mult, 0);
3967   int m = exact_log2 (int_cst_value (cst));
3968   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3969   int as_cost, sa_cost;
3970   bool mult_in_op1;
3971
3972   if (!(m >= 0 && m < maxm))
3973     return false;
3974
3975   STRIP_NOPS (op1);
3976   mult_in_op1 = operand_equal_p (op1, mult, 0);
3977
3978   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3979
3980   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3981      use that in preference to a shift insn followed by an add insn.  */
3982   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3983              ? shiftadd_cost (speed, mode, m)
3984              : (mult_in_op1
3985                 ? shiftsub1_cost (speed, mode, m)
3986                 : shiftsub0_cost (speed, mode, m)));
3987
3988   res = comp_cost (MIN (as_cost, sa_cost), 0);
3989   res += (mult_in_op1 ? cost0 : cost1);
3990
3991   STRIP_NOPS (multop);
3992   if (!is_gimple_val (multop))
3993     res += force_expr_to_var_cost (multop, speed);
3994
3995   *cost = res;
3996   return true;
3997 }
3998
3999 /* Estimates cost of forcing expression EXPR into a variable.  */
4000
4001 static comp_cost
4002 force_expr_to_var_cost (tree expr, bool speed)
4003 {
4004   static bool costs_initialized = false;
4005   static unsigned integer_cost [2];
4006   static unsigned symbol_cost [2];
4007   static unsigned address_cost [2];
4008   tree op0, op1;
4009   comp_cost cost0, cost1, cost;
4010   machine_mode mode;
4011   scalar_int_mode int_mode;
4012
4013   if (!costs_initialized)
4014     {
4015       tree type = build_pointer_type (integer_type_node);
4016       tree var, addr;
4017       rtx x;
4018       int i;
4019
4020       var = create_tmp_var_raw (integer_type_node, "test_var");
4021       TREE_STATIC (var) = 1;
4022       x = produce_memory_decl_rtl (var, NULL);
4023       SET_DECL_RTL (var, x);
4024
4025       addr = build1 (ADDR_EXPR, type, var);
4026
4027
4028       for (i = 0; i < 2; i++)
4029         {
4030           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4031                                                              2000), i);
4032
4033           symbol_cost[i] = computation_cost (addr, i) + 1;
4034
4035           address_cost[i]
4036             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4037           if (dump_file && (dump_flags & TDF_DETAILS))
4038             {
4039               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4040               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4041               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4042               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4043               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4044               fprintf (dump_file, "\n");
4045             }
4046         }
4047
4048       costs_initialized = true;
4049     }
4050
4051   STRIP_NOPS (expr);
4052
4053   if (SSA_VAR_P (expr))
4054     return no_cost;
4055
4056   if (is_gimple_min_invariant (expr))
4057     {
4058       if (TREE_CODE (expr) == INTEGER_CST)
4059         return comp_cost (integer_cost [speed], 0);
4060
4061       if (TREE_CODE (expr) == ADDR_EXPR)
4062         {
4063           tree obj = TREE_OPERAND (expr, 0);
4064
4065           if (VAR_P (obj)
4066               || TREE_CODE (obj) == PARM_DECL
4067               || TREE_CODE (obj) == RESULT_DECL)
4068             return comp_cost (symbol_cost [speed], 0);
4069         }
4070
4071       return comp_cost (address_cost [speed], 0);
4072     }
4073
4074   switch (TREE_CODE (expr))
4075     {
4076     case POINTER_PLUS_EXPR:
4077     case PLUS_EXPR:
4078     case MINUS_EXPR:
4079     case MULT_EXPR:
4080     case TRUNC_DIV_EXPR:
4081     case BIT_AND_EXPR:
4082     case BIT_IOR_EXPR:
4083     case LSHIFT_EXPR:
4084     case RSHIFT_EXPR:
4085       op0 = TREE_OPERAND (expr, 0);
4086       op1 = TREE_OPERAND (expr, 1);
4087       STRIP_NOPS (op0);
4088       STRIP_NOPS (op1);
4089       break;
4090
4091     CASE_CONVERT:
4092     case NEGATE_EXPR:
4093     case BIT_NOT_EXPR:
4094       op0 = TREE_OPERAND (expr, 0);
4095       STRIP_NOPS (op0);
4096       op1 = NULL_TREE;
4097       break;
4098
4099     default:
4100       /* Just an arbitrary value, FIXME.  */
4101       return comp_cost (target_spill_cost[speed], 0);
4102     }
4103
4104   if (op0 == NULL_TREE
4105       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4106     cost0 = no_cost;
4107   else
4108     cost0 = force_expr_to_var_cost (op0, speed);
4109
4110   if (op1 == NULL_TREE
4111       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4112     cost1 = no_cost;
4113   else
4114     cost1 = force_expr_to_var_cost (op1, speed);
4115
4116   mode = TYPE_MODE (TREE_TYPE (expr));
4117   switch (TREE_CODE (expr))
4118     {
4119     case POINTER_PLUS_EXPR:
4120     case PLUS_EXPR:
4121     case MINUS_EXPR:
4122     case NEGATE_EXPR:
4123       cost = comp_cost (add_cost (speed, mode), 0);
4124       if (TREE_CODE (expr) != NEGATE_EXPR)
4125         {
4126           tree mult = NULL_TREE;
4127           comp_cost sa_cost;
4128           if (TREE_CODE (op1) == MULT_EXPR)
4129             mult = op1;
4130           else if (TREE_CODE (op0) == MULT_EXPR)
4131             mult = op0;
4132
4133           if (mult != NULL_TREE
4134               && is_a <scalar_int_mode> (mode, &int_mode)
4135               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4136               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4137                                     speed, &sa_cost))
4138             return sa_cost;
4139         }
4140       break;
4141
4142     CASE_CONVERT:
4143       {
4144         tree inner_mode, outer_mode;
4145         outer_mode = TREE_TYPE (expr);
4146         inner_mode = TREE_TYPE (op0);
4147         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4148                                        TYPE_MODE (inner_mode), speed), 0);
4149       }
4150       break;
4151
4152     case MULT_EXPR:
4153       if (cst_and_fits_in_hwi (op0))
4154         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4155                                              mode, speed), 0);
4156       else if (cst_and_fits_in_hwi (op1))
4157         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4158                                              mode, speed), 0);
4159       else
4160         return comp_cost (target_spill_cost [speed], 0);
4161       break;
4162
4163     case TRUNC_DIV_EXPR:
4164       /* Division by power of two is usually cheap, so we allow it.  Forbid
4165          anything else.  */
4166       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4167         cost = comp_cost (add_cost (speed, mode), 0);
4168       else
4169         cost = comp_cost (target_spill_cost[speed], 0);
4170       break;
4171
4172     case BIT_AND_EXPR:
4173     case BIT_IOR_EXPR:
4174     case BIT_NOT_EXPR:
4175     case LSHIFT_EXPR:
4176     case RSHIFT_EXPR:
4177       cost = comp_cost (add_cost (speed, mode), 0);
4178       break;
4179
4180     default:
4181       gcc_unreachable ();
4182     }
4183
4184   cost += cost0;
4185   cost += cost1;
4186   return cost;
4187 }
4188
4189 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4190    invariants the computation depends on.  */
4191
4192 static comp_cost
4193 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4194 {
4195   if (!expr)
4196     return no_cost;
4197
4198   find_inv_vars (data, &expr, inv_vars);
4199   return force_expr_to_var_cost (expr, data->speed);
4200 }
4201
4202 /* Returns cost of auto-modifying address expression in shape base + offset.
4203    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4204    address expression.  The address expression has ADDR_MODE in addr space
4205    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4206    speed or size.  */
4207
4208 enum ainc_type
4209 {
4210   AINC_PRE_INC,         /* Pre increment.  */
4211   AINC_PRE_DEC,         /* Pre decrement.  */
4212   AINC_POST_INC,        /* Post increment.  */
4213   AINC_POST_DEC,        /* Post decrement.  */
4214   AINC_NONE             /* Also the number of auto increment types.  */
4215 };
4216
4217 struct ainc_cost_data
4218 {
4219   unsigned costs[AINC_NONE];
4220 };
4221
4222 static comp_cost
4223 get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset,
4224                        machine_mode addr_mode, machine_mode mem_mode,
4225                        addr_space_t as, bool speed)
4226 {
4227   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4228       && !USE_STORE_PRE_DECREMENT (mem_mode)
4229       && !USE_LOAD_POST_DECREMENT (mem_mode)
4230       && !USE_STORE_POST_DECREMENT (mem_mode)
4231       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4232       && !USE_STORE_PRE_INCREMENT (mem_mode)
4233       && !USE_LOAD_POST_INCREMENT (mem_mode)
4234       && !USE_STORE_POST_INCREMENT (mem_mode))
4235     return infinite_cost;
4236
4237   static vec<ainc_cost_data *> ainc_cost_data_list;
4238   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4239   if (idx >= ainc_cost_data_list.length ())
4240     {
4241       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4242
4243       gcc_assert (nsize > idx);
4244       ainc_cost_data_list.safe_grow_cleared (nsize);
4245     }
4246
4247   ainc_cost_data *data = ainc_cost_data_list[idx];
4248   if (data == NULL)
4249     {
4250       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4251
4252       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4253       data->costs[AINC_PRE_DEC] = INFTY;
4254       data->costs[AINC_POST_DEC] = INFTY;
4255       data->costs[AINC_PRE_INC] = INFTY;
4256       data->costs[AINC_POST_INC] = INFTY;
4257       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4258           || USE_STORE_PRE_DECREMENT (mem_mode))
4259         {
4260           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4261
4262           if (memory_address_addr_space_p (mem_mode, addr, as))
4263             data->costs[AINC_PRE_DEC]
4264               = address_cost (addr, mem_mode, as, speed);
4265         }
4266       if (USE_LOAD_POST_DECREMENT (mem_mode)
4267           || USE_STORE_POST_DECREMENT (mem_mode))
4268         {
4269           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4270
4271           if (memory_address_addr_space_p (mem_mode, addr, as))
4272             data->costs[AINC_POST_DEC]
4273               = address_cost (addr, mem_mode, as, speed);
4274         }
4275       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4276           || USE_STORE_PRE_INCREMENT (mem_mode))
4277         {
4278           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4279
4280           if (memory_address_addr_space_p (mem_mode, addr, as))
4281             data->costs[AINC_PRE_INC]
4282               = address_cost (addr, mem_mode, as, speed);
4283         }
4284       if (USE_LOAD_POST_INCREMENT (mem_mode)
4285           || USE_STORE_POST_INCREMENT (mem_mode))
4286         {
4287           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4288
4289           if (memory_address_addr_space_p (mem_mode, addr, as))
4290             data->costs[AINC_POST_INC]
4291               = address_cost (addr, mem_mode, as, speed);
4292         }
4293       ainc_cost_data_list[idx] = data;
4294     }
4295
4296   HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4297   if (ainc_offset == 0 && msize == ainc_step)
4298     return comp_cost (data->costs[AINC_POST_INC], 0);
4299   if (ainc_offset == 0 && msize == -ainc_step)
4300     return comp_cost (data->costs[AINC_POST_DEC], 0);
4301   if (ainc_offset == msize && msize == ainc_step)
4302     return comp_cost (data->costs[AINC_PRE_INC], 0);
4303   if (ainc_offset == -msize && msize == -ainc_step)
4304     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4305
4306   return infinite_cost;
4307 }
4308
4309 /* Return cost of computing USE's address expression by using CAND.
4310    AFF_INV and AFF_VAR represent invariant and variant parts of the
4311    address expression, respectively.  If AFF_INV is simple, store
4312    the loop invariant variables which are depended by it in INV_VARS;
4313    if AFF_INV is complicated, handle it as a new invariant expression
4314    and record it in INV_EXPR.  RATIO indicates multiple times between
4315    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4316    value to it indicating if this is an auto-increment address.  */
4317
4318 static comp_cost
4319 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4320                   struct iv_cand *cand, aff_tree *aff_inv,
4321                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4322                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4323                   bool *can_autoinc, bool speed)
4324 {
4325   rtx addr;
4326   bool simple_inv = true;
4327   tree comp_inv = NULL_TREE, type = aff_var->type;
4328   comp_cost var_cost = no_cost, cost = no_cost;
4329   struct mem_address parts = {NULL_TREE, integer_one_node,
4330                               NULL_TREE, NULL_TREE, NULL_TREE};
4331   machine_mode addr_mode = TYPE_MODE (type);
4332   machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4333   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4334   /* Only true if ratio != 1.  */
4335   bool ok_with_ratio_p = false;
4336   bool ok_without_ratio_p = false;
4337
4338   if (!aff_combination_const_p (aff_inv))
4339     {
4340       parts.index = integer_one_node;
4341       /* Addressing mode "base + index".  */
4342       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4343       if (ratio != 1)
4344         {
4345           parts.step = wide_int_to_tree (type, ratio);
4346           /* Addressing mode "base + index << scale".  */
4347           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4348           if (!ok_with_ratio_p)
4349             parts.step = NULL_TREE;
4350         }
4351       if (ok_with_ratio_p || ok_without_ratio_p)
4352         {
4353           if (aff_inv->offset != 0)
4354             {
4355               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4356               /* Addressing mode "base + index [<< scale] + offset".  */
4357               if (!valid_mem_ref_p (mem_mode, as, &parts))
4358                 parts.offset = NULL_TREE;
4359               else
4360                 aff_inv->offset = 0;
4361             }
4362
4363           move_fixed_address_to_symbol (&parts, aff_inv);
4364           /* Base is fixed address and is moved to symbol part.  */
4365           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4366             parts.base = NULL_TREE;
4367
4368           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4369           if (parts.symbol != NULL_TREE
4370               && !valid_mem_ref_p (mem_mode, as, &parts))
4371             {
4372               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4373               parts.symbol = NULL_TREE;
4374               /* Reset SIMPLE_INV since symbol address needs to be computed
4375                  outside of address expression in this case.  */
4376               simple_inv = false;
4377               /* Symbol part is moved back to base part, it can't be NULL.  */
4378               parts.base = integer_one_node;
4379             }
4380         }
4381       else
4382         parts.index = NULL_TREE;
4383     }
4384   else
4385     {
4386       if (can_autoinc && ratio == 1 && cst_and_fits_in_hwi (cand->iv->step))
4387         {
4388           HOST_WIDE_INT ainc_step = int_cst_value (cand->iv->step);
4389           HOST_WIDE_INT ainc_offset = (aff_inv->offset).to_shwi ();
4390
4391           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4392             ainc_offset += ainc_step;
4393           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4394                                         addr_mode, mem_mode, as, speed);
4395           if (!cost.infinite_cost_p ())
4396             {
4397               *can_autoinc = true;
4398               return cost;
4399             }
4400           cost = no_cost;
4401         }
4402       if (!aff_combination_zero_p (aff_inv))
4403         {
4404           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4405           /* Addressing mode "base + offset".  */
4406           if (!valid_mem_ref_p (mem_mode, as, &parts))
4407             parts.offset = NULL_TREE;
4408           else
4409             aff_inv->offset = 0;
4410         }
4411     }
4412
4413   if (simple_inv)
4414     simple_inv = (aff_inv == NULL
4415                   || aff_combination_const_p (aff_inv)
4416                   || aff_combination_singleton_var_p (aff_inv));
4417   if (!aff_combination_zero_p (aff_inv))
4418     comp_inv = aff_combination_to_tree (aff_inv);
4419   if (comp_inv != NULL_TREE)
4420     cost = force_var_cost (data, comp_inv, inv_vars);
4421   if (ratio != 1 && parts.step == NULL_TREE)
4422     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4423   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4424     var_cost += add_cost (speed, addr_mode);
4425
4426   if (comp_inv && inv_expr && !simple_inv)
4427     {
4428       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4429       /* Clear depends on.  */
4430       if (*inv_expr != NULL && inv_vars && *inv_vars)
4431         bitmap_clear (*inv_vars);
4432
4433       /* Cost of small invariant expression adjusted against loop niters
4434          is usually zero, which makes it difficult to be differentiated
4435          from candidate based on loop invariant variables.  Secondly, the
4436          generated invariant expression may not be hoisted out of loop by
4437          following pass.  We penalize the cost by rounding up in order to
4438          neutralize such effects.  */
4439       cost.cost = adjust_setup_cost (data, cost.cost, true);
4440       cost.scratch = cost.cost;
4441     }
4442
4443   cost += var_cost;
4444   addr = addr_for_mem_ref (&parts, as, false);
4445   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4446   cost += address_cost (addr, mem_mode, as, speed);
4447
4448   if (parts.symbol != NULL_TREE)
4449     cost.complexity += 1;
4450   /* Don't increase the complexity of adding a scaled index if it's
4451      the only kind of index that the target allows.  */
4452   if (parts.step != NULL_TREE && ok_without_ratio_p)
4453     cost.complexity += 1;
4454   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4455     cost.complexity += 1;
4456   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4457     cost.complexity += 1;
4458
4459   return cost;
4460 }
4461
4462 /* Scale (multiply) the computed COST (except scratch part that should be
4463    hoisted out a loop) by header->frequency / AT->frequency, which makes
4464    expected cost more accurate.  */
4465
4466 static comp_cost
4467 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4468 {
4469    int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4470    int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4471    if (loop_freq != 0)
4472      {
4473        gcc_assert (cost.scratch <= cost.cost);
4474        int scaled_cost
4475          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4476
4477        if (dump_file && (dump_flags & TDF_DETAILS))
4478          fprintf (dump_file, "Scaling cost based on bb prob "
4479                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4480                   1.0f * bb_freq / loop_freq, cost.cost,
4481                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4482
4483        cost.cost = scaled_cost;
4484      }
4485
4486   return cost;
4487 }
4488
4489 /* Determines the cost of the computation by that USE is expressed
4490    from induction variable CAND.  If ADDRESS_P is true, we just need
4491    to create an address from it, otherwise we want to get it into
4492    register.  A set of invariants we depend on is stored in INV_VARS.
4493    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4494    addressing is likely.  If INV_EXPR is nonnull, record invariant
4495    expr entry in it.  */
4496
4497 static comp_cost
4498 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4499                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4500                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4501 {
4502   gimple *at = use->stmt;
4503   tree ubase = use->iv->base, cbase = cand->iv->base;
4504   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4505   tree comp_inv = NULL_TREE;
4506   HOST_WIDE_INT ratio, aratio;
4507   comp_cost cost;
4508   widest_int rat;
4509   aff_tree aff_inv, aff_var;
4510   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4511
4512   if (inv_vars)
4513     *inv_vars = NULL;
4514   if (can_autoinc)
4515     *can_autoinc = false;
4516   if (inv_expr)
4517     *inv_expr = NULL;
4518
4519   /* Check if we have enough precision to express the values of use.  */
4520   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4521     return infinite_cost;
4522
4523   if (address_p
4524       || (use->iv->base_object
4525           && cand->iv->base_object
4526           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4527           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4528     {
4529       /* Do not try to express address of an object with computation based
4530          on address of a different object.  This may cause problems in rtl
4531          level alias analysis (that does not expect this to be happening,
4532          as this is illegal in C), and would be unlikely to be useful
4533          anyway.  */
4534       if (use->iv->base_object
4535           && cand->iv->base_object
4536           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4537         return infinite_cost;
4538     }
4539
4540   if (!get_computation_aff_1 (data->current_loop, at, use,
4541                               cand, &aff_inv, &aff_var, &rat)
4542       || !wi::fits_shwi_p (rat))
4543     return infinite_cost;
4544
4545   ratio = rat.to_shwi ();
4546   if (address_p)
4547     {
4548       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4549                                inv_vars, inv_expr, can_autoinc, speed);
4550       return get_scaled_computation_cost_at (data, at, cost);
4551     }
4552
4553   bool simple_inv = (aff_combination_const_p (&aff_inv)
4554                      || aff_combination_singleton_var_p (&aff_inv));
4555   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4556   aff_combination_convert (&aff_inv, signed_type);
4557   if (!aff_combination_zero_p (&aff_inv))
4558     comp_inv = aff_combination_to_tree (&aff_inv);
4559
4560   cost = force_var_cost (data, comp_inv, inv_vars);
4561   if (comp_inv && inv_expr && !simple_inv)
4562     {
4563       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4564       /* Clear depends on.  */
4565       if (*inv_expr != NULL && inv_vars && *inv_vars)
4566         bitmap_clear (*inv_vars);
4567
4568       cost.cost = adjust_setup_cost (data, cost.cost);
4569       /* Record setup cost in scratch field.  */
4570       cost.scratch = cost.cost;
4571     }
4572   /* Cost of constant integer can be covered when adding invariant part to
4573      variant part.  */
4574   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4575     cost = no_cost;
4576
4577   /* Need type narrowing to represent use with cand.  */
4578   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4579     {
4580       machine_mode outer_mode = TYPE_MODE (utype);
4581       machine_mode inner_mode = TYPE_MODE (ctype);
4582       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4583     }
4584
4585   /* Turn a + i * (-c) into a - i * c.  */
4586   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4587     aratio = -ratio;
4588   else
4589     aratio = ratio;
4590
4591   if (ratio != 1)
4592     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4593
4594   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4595      instruction.  */
4596   /* Need to add up the invariant and variant parts.  */
4597   if (comp_inv && !integer_zerop (comp_inv))
4598     cost += add_cost (speed, TYPE_MODE (utype));
4599
4600   return get_scaled_computation_cost_at (data, at, cost);
4601 }
4602
4603 /* Determines cost of computing the use in GROUP with CAND in a generic
4604    expression.  */
4605
4606 static bool
4607 determine_group_iv_cost_generic (struct ivopts_data *data,
4608                                  struct iv_group *group, struct iv_cand *cand)
4609 {
4610   comp_cost cost;
4611   iv_inv_expr_ent *inv_expr = NULL;
4612   bitmap inv_vars = NULL, inv_exprs = NULL;
4613   struct iv_use *use = group->vuses[0];
4614
4615   /* The simple case first -- if we need to express value of the preserved
4616      original biv, the cost is 0.  This also prevents us from counting the
4617      cost of increment twice -- once at this use and once in the cost of
4618      the candidate.  */
4619   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4620     cost = no_cost;
4621   else
4622     cost = get_computation_cost (data, use, cand, false,
4623                                  &inv_vars, NULL, &inv_expr);
4624
4625   if (inv_expr)
4626     {
4627       inv_exprs = BITMAP_ALLOC (NULL);
4628       bitmap_set_bit (inv_exprs, inv_expr->id);
4629     }
4630   set_group_iv_cost (data, group, cand, cost, inv_vars,
4631                      NULL_TREE, ERROR_MARK, inv_exprs);
4632   return !cost.infinite_cost_p ();
4633 }
4634
4635 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4636
4637 static bool
4638 determine_group_iv_cost_address (struct ivopts_data *data,
4639                                  struct iv_group *group, struct iv_cand *cand)
4640 {
4641   unsigned i;
4642   bitmap inv_vars = NULL, inv_exprs = NULL;
4643   bool can_autoinc;
4644   iv_inv_expr_ent *inv_expr = NULL;
4645   struct iv_use *use = group->vuses[0];
4646   comp_cost sum_cost = no_cost, cost;
4647
4648   cost = get_computation_cost (data, use, cand, true,
4649                                &inv_vars, &can_autoinc, &inv_expr);
4650
4651   if (inv_expr)
4652     {
4653       inv_exprs = BITMAP_ALLOC (NULL);
4654       bitmap_set_bit (inv_exprs, inv_expr->id);
4655     }
4656   sum_cost = cost;
4657   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4658     {
4659       if (can_autoinc)
4660         sum_cost -= cand->cost_step;
4661       /* If we generated the candidate solely for exploiting autoincrement
4662          opportunities, and it turns out it can't be used, set the cost to
4663          infinity to make sure we ignore it.  */
4664       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4665         sum_cost = infinite_cost;
4666     }
4667
4668   /* Uses in a group can share setup code, so only add setup cost once.  */
4669   cost -= cost.scratch;
4670   /* Compute and add costs for rest uses of this group.  */
4671   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4672     {
4673       struct iv_use *next = group->vuses[i];
4674
4675       /* TODO: We could skip computing cost for sub iv_use when it has the
4676          same cost as the first iv_use, but the cost really depends on the
4677          offset and where the iv_use is.  */
4678         cost = get_computation_cost (data, next, cand, true,
4679                                      NULL, &can_autoinc, &inv_expr);
4680         if (inv_expr)
4681           {
4682             if (!inv_exprs)
4683               inv_exprs = BITMAP_ALLOC (NULL);
4684
4685             bitmap_set_bit (inv_exprs, inv_expr->id);
4686           }
4687       sum_cost += cost;
4688     }
4689   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4690                      NULL_TREE, ERROR_MARK, inv_exprs);
4691
4692   return !sum_cost.infinite_cost_p ();
4693 }
4694
4695 /* Computes value of candidate CAND at position AT in iteration NITER, and
4696    stores it to VAL.  */
4697
4698 static void
4699 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4700                aff_tree *val)
4701 {
4702   aff_tree step, delta, nit;
4703   struct iv *iv = cand->iv;
4704   tree type = TREE_TYPE (iv->base);
4705   tree steptype;
4706   if (POINTER_TYPE_P (type))
4707     steptype = sizetype;
4708   else
4709     steptype = unsigned_type_for (type);
4710
4711   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4712   aff_combination_convert (&step, steptype);
4713   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4714   aff_combination_convert (&nit, steptype);
4715   aff_combination_mult (&nit, &step, &delta);
4716   if (stmt_after_increment (loop, cand, at))
4717     aff_combination_add (&delta, &step);
4718
4719   tree_to_aff_combination (iv->base, type, val);
4720   if (!POINTER_TYPE_P (type))
4721     aff_combination_convert (val, steptype);
4722   aff_combination_add (val, &delta);
4723 }
4724
4725 /* Returns period of induction variable iv.  */
4726
4727 static tree
4728 iv_period (struct iv *iv)
4729 {
4730   tree step = iv->step, period, type;
4731   tree pow2div;
4732
4733   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4734
4735   type = unsigned_type_for (TREE_TYPE (step));
4736   /* Period of the iv is lcm (step, type_range)/step -1,
4737      i.e., N*type_range/step - 1. Since type range is power
4738      of two, N == (step >> num_of_ending_zeros_binary (step),
4739      so the final result is
4740
4741        (type_range >> num_of_ending_zeros_binary (step)) - 1
4742
4743   */
4744   pow2div = num_ending_zeros (step);
4745
4746   period = build_low_bits_mask (type,
4747                                 (TYPE_PRECISION (type)
4748                                  - tree_to_uhwi (pow2div)));
4749
4750   return period;
4751 }
4752
4753 /* Returns the comparison operator used when eliminating the iv USE.  */
4754
4755 static enum tree_code
4756 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4757 {
4758   struct loop *loop = data->current_loop;
4759   basic_block ex_bb;
4760   edge exit;
4761
4762   ex_bb = gimple_bb (use->stmt);
4763   exit = EDGE_SUCC (ex_bb, 0);
4764   if (flow_bb_inside_loop_p (loop, exit->dest))
4765     exit = EDGE_SUCC (ex_bb, 1);
4766
4767   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4768 }
4769
4770 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4771    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4772    calculation is performed in non-wrapping type.
4773
4774    TODO: More generally, we could test for the situation that
4775          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4776          This would require knowing the sign of OFFSET.  */
4777
4778 static bool
4779 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4780 {
4781   enum tree_code code;
4782   tree e1, e2;
4783   aff_tree aff_e1, aff_e2, aff_offset;
4784
4785   if (!nowrap_type_p (TREE_TYPE (base)))
4786     return false;
4787
4788   base = expand_simple_operations (base);
4789
4790   if (TREE_CODE (base) == SSA_NAME)
4791     {
4792       gimple *stmt = SSA_NAME_DEF_STMT (base);
4793
4794       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4795         return false;
4796
4797       code = gimple_assign_rhs_code (stmt);
4798       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4799         return false;
4800
4801       e1 = gimple_assign_rhs1 (stmt);
4802       e2 = gimple_assign_rhs2 (stmt);
4803     }
4804   else
4805     {
4806       code = TREE_CODE (base);
4807       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4808         return false;
4809       e1 = TREE_OPERAND (base, 0);
4810       e2 = TREE_OPERAND (base, 1);
4811     }
4812
4813   /* Use affine expansion as deeper inspection to prove the equality.  */
4814   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4815                                   &aff_e2, &data->name_expansion_cache);
4816   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4817                                   &aff_offset, &data->name_expansion_cache);
4818   aff_combination_scale (&aff_offset, -1);
4819   switch (code)
4820     {
4821     case PLUS_EXPR:
4822       aff_combination_add (&aff_e2, &aff_offset);
4823       if (aff_combination_zero_p (&aff_e2))
4824         return true;
4825
4826       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4827                                       &aff_e1, &data->name_expansion_cache);
4828       aff_combination_add (&aff_e1, &aff_offset);
4829       return aff_combination_zero_p (&aff_e1);
4830
4831     case POINTER_PLUS_EXPR:
4832       aff_combination_add (&aff_e2, &aff_offset);
4833       return aff_combination_zero_p (&aff_e2);
4834
4835     default:
4836       return false;
4837     }
4838 }
4839
4840 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4841    comparison with CAND.  NITER describes the number of iterations of
4842    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4843
4844    We aim to handle the following situation:
4845
4846    sometype *base, *p;
4847    int a, b, i;
4848
4849    i = a;
4850    p = p_0 = base + a;
4851
4852    do
4853      {
4854        bla (*p);
4855        p++;
4856        i++;
4857      }
4858    while (i < b);
4859
4860    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4861    We aim to optimize this to
4862
4863    p = p_0 = base + a;
4864    do
4865      {
4866        bla (*p);
4867        p++;
4868      }
4869    while (p < p_0 - a + b);
4870
4871    This preserves the correctness, since the pointer arithmetics does not
4872    overflow.  More precisely:
4873
4874    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4875       overflow in computing it or the values of p.
4876    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4877       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4878
4879 static bool
4880 iv_elimination_compare_lt (struct ivopts_data *data,
4881                            struct iv_cand *cand, enum tree_code *comp_p,
4882                            struct tree_niter_desc *niter)
4883 {
4884   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4885   struct aff_tree nit, tmpa, tmpb;
4886   enum tree_code comp;
4887   HOST_WIDE_INT step;
4888
4889   /* We need to know that the candidate induction variable does not overflow.
4890      While more complex analysis may be used to prove this, for now just
4891      check that the variable appears in the original program and that it
4892      is computed in a type that guarantees no overflows.  */
4893   cand_type = TREE_TYPE (cand->iv->base);
4894   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4895     return false;
4896
4897   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4898      the calculation of the BOUND could overflow, making the comparison
4899      invalid.  */
4900   if (!data->loop_single_exit_p)
4901     return false;
4902
4903   /* We need to be able to decide whether candidate is increasing or decreasing
4904      in order to choose the right comparison operator.  */
4905   if (!cst_and_fits_in_hwi (cand->iv->step))
4906     return false;
4907   step = int_cst_value (cand->iv->step);
4908
4909   /* Check that the number of iterations matches the expected pattern:
4910      a + 1 > b ? 0 : b - a - 1.  */
4911   mbz = niter->may_be_zero;
4912   if (TREE_CODE (mbz) == GT_EXPR)
4913     {
4914       /* Handle a + 1 > b.  */
4915       tree op0 = TREE_OPERAND (mbz, 0);
4916       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4917         {
4918           a = TREE_OPERAND (op0, 0);
4919           b = TREE_OPERAND (mbz, 1);
4920         }
4921       else
4922         return false;
4923     }
4924   else if (TREE_CODE (mbz) == LT_EXPR)
4925     {
4926       tree op1 = TREE_OPERAND (mbz, 1);
4927
4928       /* Handle b < a + 1.  */
4929       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4930         {
4931           a = TREE_OPERAND (op1, 0);
4932           b = TREE_OPERAND (mbz, 0);
4933         }
4934       else
4935         return false;
4936     }
4937   else
4938     return false;
4939
4940   /* Expected number of iterations is B - A - 1.  Check that it matches
4941      the actual number, i.e., that B - A - NITER = 1.  */
4942   tree_to_aff_combination (niter->niter, nit_type, &nit);
4943   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4944   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4945   aff_combination_scale (&nit, -1);
4946   aff_combination_scale (&tmpa, -1);
4947   aff_combination_add (&tmpb, &tmpa);
4948   aff_combination_add (&tmpb, &nit);
4949   if (tmpb.n != 0 || tmpb.offset != 1)
4950     return false;
4951
4952   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4953      overflow.  */
4954   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4955                         cand->iv->step,
4956                         fold_convert (TREE_TYPE (cand->iv->step), a));
4957   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4958     return false;
4959
4960   /* Determine the new comparison operator.  */
4961   comp = step < 0 ? GT_EXPR : LT_EXPR;
4962   if (*comp_p == NE_EXPR)
4963     *comp_p = comp;
4964   else if (*comp_p == EQ_EXPR)
4965     *comp_p = invert_tree_comparison (comp, false);
4966   else
4967     gcc_unreachable ();
4968
4969   return true;
4970 }
4971
4972 /* Check whether it is possible to express the condition in USE by comparison
4973    of candidate CAND.  If so, store the value compared with to BOUND, and the
4974    comparison operator to COMP.  */
4975
4976 static bool
4977 may_eliminate_iv (struct ivopts_data *data,
4978                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4979                   enum tree_code *comp)
4980 {
4981   basic_block ex_bb;
4982   edge exit;
4983   tree period;
4984   struct loop *loop = data->current_loop;
4985   aff_tree bnd;
4986   struct tree_niter_desc *desc = NULL;
4987
4988   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4989     return false;
4990
4991   /* For now works only for exits that dominate the loop latch.
4992      TODO: extend to other conditions inside loop body.  */
4993   ex_bb = gimple_bb (use->stmt);
4994   if (use->stmt != last_stmt (ex_bb)
4995       || gimple_code (use->stmt) != GIMPLE_COND
4996       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4997     return false;
4998
4999   exit = EDGE_SUCC (ex_bb, 0);
5000   if (flow_bb_inside_loop_p (loop, exit->dest))
5001     exit = EDGE_SUCC (ex_bb, 1);
5002   if (flow_bb_inside_loop_p (loop, exit->dest))
5003     return false;
5004
5005   desc = niter_for_exit (data, exit);
5006   if (!desc)
5007     return false;
5008
5009   /* Determine whether we can use the variable to test the exit condition.
5010      This is the case iff the period of the induction variable is greater
5011      than the number of iterations for which the exit condition is true.  */
5012   period = iv_period (cand->iv);
5013
5014   /* If the number of iterations is constant, compare against it directly.  */
5015   if (TREE_CODE (desc->niter) == INTEGER_CST)
5016     {
5017       /* See cand_value_at.  */
5018       if (stmt_after_increment (loop, cand, use->stmt))
5019         {
5020           if (!tree_int_cst_lt (desc->niter, period))
5021             return false;
5022         }
5023       else
5024         {
5025           if (tree_int_cst_lt (period, desc->niter))
5026             return false;
5027         }
5028     }
5029
5030   /* If not, and if this is the only possible exit of the loop, see whether
5031      we can get a conservative estimate on the number of iterations of the
5032      entire loop and compare against that instead.  */
5033   else
5034     {
5035       widest_int period_value, max_niter;
5036
5037       max_niter = desc->max;
5038       if (stmt_after_increment (loop, cand, use->stmt))
5039         max_niter += 1;
5040       period_value = wi::to_widest (period);
5041       if (wi::gtu_p (max_niter, period_value))
5042         {
5043           /* See if we can take advantage of inferred loop bound
5044              information.  */
5045           if (data->loop_single_exit_p)
5046             {
5047               if (!max_loop_iterations (loop, &max_niter))
5048                 return false;
5049               /* The loop bound is already adjusted by adding 1.  */
5050               if (wi::gtu_p (max_niter, period_value))
5051                 return false;
5052             }
5053           else
5054             return false;
5055         }
5056     }
5057
5058   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5059
5060   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5061                          aff_combination_to_tree (&bnd));
5062   *comp = iv_elimination_compare (data, use);
5063
5064   /* It is unlikely that computing the number of iterations using division
5065      would be more profitable than keeping the original induction variable.  */
5066   if (expression_expensive_p (*bound))
5067     return false;
5068
5069   /* Sometimes, it is possible to handle the situation that the number of
5070      iterations may be zero unless additional assumptions by using <
5071      instead of != in the exit condition.
5072
5073      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5074            base the exit condition on it.  However, that is often too
5075            expensive.  */
5076   if (!integer_zerop (desc->may_be_zero))
5077     return iv_elimination_compare_lt (data, cand, comp, desc);
5078
5079   return true;
5080 }
5081
5082  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5083     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5084
5085 static int
5086 parm_decl_cost (struct ivopts_data *data, tree bound)
5087 {
5088   tree sbound = bound;
5089   STRIP_NOPS (sbound);
5090
5091   if (TREE_CODE (sbound) == SSA_NAME
5092       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5093       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5094       && data->body_includes_call)
5095     return COSTS_N_INSNS (1);
5096
5097   return 0;
5098 }
5099
5100 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5101
5102 static bool
5103 determine_group_iv_cost_cond (struct ivopts_data *data,
5104                               struct iv_group *group, struct iv_cand *cand)
5105 {
5106   tree bound = NULL_TREE;
5107   struct iv *cmp_iv;
5108   bitmap inv_exprs = NULL;
5109   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5110   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5111   enum comp_iv_rewrite rewrite_type;
5112   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5113   tree *control_var, *bound_cst;
5114   enum tree_code comp = ERROR_MARK;
5115   struct iv_use *use = group->vuses[0];
5116
5117   /* Extract condition operands.  */
5118   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5119                                         &bound_cst, NULL, &cmp_iv);
5120   gcc_assert (rewrite_type != COMP_IV_NA);
5121
5122   /* Try iv elimination.  */
5123   if (rewrite_type == COMP_IV_ELIM
5124       && may_eliminate_iv (data, use, cand, &bound, &comp))
5125     {
5126       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5127       if (elim_cost.cost == 0)
5128         elim_cost.cost = parm_decl_cost (data, bound);
5129       else if (TREE_CODE (bound) == INTEGER_CST)
5130         elim_cost.cost = 0;
5131       /* If we replace a loop condition 'i < n' with 'p < base + n',
5132          inv_vars_elim will have 'base' and 'n' set, which implies that both
5133          'base' and 'n' will be live during the loop.    More likely,
5134          'base + n' will be loop invariant, resulting in only one live value
5135          during the loop.  So in that case we clear inv_vars_elim and set
5136          inv_expr_elim instead.  */
5137       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5138         {
5139           inv_expr_elim = get_loop_invariant_expr (data, bound);
5140           bitmap_clear (inv_vars_elim);
5141         }
5142       /* The bound is a loop invariant, so it will be only computed
5143          once.  */
5144       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5145     }
5146
5147   /* When the condition is a comparison of the candidate IV against
5148      zero, prefer this IV.
5149
5150      TODO: The constant that we're subtracting from the cost should
5151      be target-dependent.  This information should be added to the
5152      target costs for each backend.  */
5153   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5154       && integer_zerop (*bound_cst)
5155       && (operand_equal_p (*control_var, cand->var_after, 0)
5156           || operand_equal_p (*control_var, cand->var_before, 0)))
5157     elim_cost -= 1;
5158
5159   express_cost = get_computation_cost (data, use, cand, false,
5160                                        &inv_vars_express, NULL,
5161                                        &inv_expr_express);
5162   if (cmp_iv != NULL)
5163     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5164
5165   /* Count the cost of the original bound as well.  */
5166   bound_cost = force_var_cost (data, *bound_cst, NULL);
5167   if (bound_cost.cost == 0)
5168     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5169   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5170     bound_cost.cost = 0;
5171   express_cost += bound_cost;
5172
5173   /* Choose the better approach, preferring the eliminated IV. */
5174   if (elim_cost <= express_cost)
5175     {
5176       cost = elim_cost;
5177       inv_vars = inv_vars_elim;
5178       inv_vars_elim = NULL;
5179       inv_expr = inv_expr_elim;
5180     }
5181   else
5182     {
5183       cost = express_cost;
5184       inv_vars = inv_vars_express;
5185       inv_vars_express = NULL;
5186       bound = NULL_TREE;
5187       comp = ERROR_MARK;
5188       inv_expr = inv_expr_express;
5189     }
5190
5191   if (inv_expr)
5192     {
5193       inv_exprs = BITMAP_ALLOC (NULL);
5194       bitmap_set_bit (inv_exprs, inv_expr->id);
5195     }
5196   set_group_iv_cost (data, group, cand, cost,
5197                      inv_vars, bound, comp, inv_exprs);
5198
5199   if (inv_vars_elim)
5200     BITMAP_FREE (inv_vars_elim);
5201   if (inv_vars_express)
5202     BITMAP_FREE (inv_vars_express);
5203
5204   return !cost.infinite_cost_p ();
5205 }
5206
5207 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5208    if USE cannot be represented with CAND.  */
5209
5210 static bool
5211 determine_group_iv_cost (struct ivopts_data *data,
5212                          struct iv_group *group, struct iv_cand *cand)
5213 {
5214   switch (group->type)
5215     {
5216     case USE_NONLINEAR_EXPR:
5217       return determine_group_iv_cost_generic (data, group, cand);
5218
5219     case USE_ADDRESS:
5220       return determine_group_iv_cost_address (data, group, cand);
5221
5222     case USE_COMPARE:
5223       return determine_group_iv_cost_cond (data, group, cand);
5224
5225     default:
5226       gcc_unreachable ();
5227     }
5228 }
5229
5230 /* Return true if get_computation_cost indicates that autoincrement is
5231    a possibility for the pair of USE and CAND, false otherwise.  */
5232
5233 static bool
5234 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5235                            struct iv_cand *cand)
5236 {
5237   if (use->type != USE_ADDRESS)
5238     return false;
5239
5240   bool can_autoinc = false;
5241   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5242   return can_autoinc;
5243 }
5244
5245 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5246    use that allows autoincrement, and set their AINC_USE if possible.  */
5247
5248 static void
5249 set_autoinc_for_original_candidates (struct ivopts_data *data)
5250 {
5251   unsigned i, j;
5252
5253   for (i = 0; i < data->vcands.length (); i++)
5254     {
5255       struct iv_cand *cand = data->vcands[i];
5256       struct iv_use *closest_before = NULL;
5257       struct iv_use *closest_after = NULL;
5258       if (cand->pos != IP_ORIGINAL)
5259         continue;
5260
5261       for (j = 0; j < data->vgroups.length (); j++)
5262         {
5263           struct iv_group *group = data->vgroups[j];
5264           struct iv_use *use = group->vuses[0];
5265           unsigned uid = gimple_uid (use->stmt);
5266
5267           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5268             continue;
5269
5270           if (uid < gimple_uid (cand->incremented_at)
5271               && (closest_before == NULL
5272                   || uid > gimple_uid (closest_before->stmt)))
5273             closest_before = use;
5274
5275           if (uid > gimple_uid (cand->incremented_at)
5276               && (closest_after == NULL
5277                   || uid < gimple_uid (closest_after->stmt)))
5278             closest_after = use;
5279         }
5280
5281       if (closest_before != NULL
5282           && autoinc_possible_for_pair (data, closest_before, cand))
5283         cand->ainc_use = closest_before;
5284       else if (closest_after != NULL
5285                && autoinc_possible_for_pair (data, closest_after, cand))
5286         cand->ainc_use = closest_after;
5287     }
5288 }
5289
5290 /* Relate compare use with all candidates.  */
5291
5292 static void
5293 relate_compare_use_with_all_cands (struct ivopts_data *data)
5294 {
5295   unsigned i, count = data->vcands.length ();
5296   for (i = 0; i < data->vgroups.length (); i++)
5297     {
5298       struct iv_group *group = data->vgroups[i];
5299
5300       if (group->type == USE_COMPARE)
5301         bitmap_set_range (group->related_cands, 0, count);
5302     }
5303 }
5304
5305 /* Finds the candidates for the induction variables.  */
5306
5307 static void
5308 find_iv_candidates (struct ivopts_data *data)
5309 {
5310   /* Add commonly used ivs.  */
5311   add_standard_iv_candidates (data);
5312
5313   /* Add old induction variables.  */
5314   add_iv_candidate_for_bivs (data);
5315
5316   /* Add induction variables derived from uses.  */
5317   add_iv_candidate_for_groups (data);
5318
5319   set_autoinc_for_original_candidates (data);
5320
5321   /* Record the important candidates.  */
5322   record_important_candidates (data);
5323
5324   /* Relate compare iv_use with all candidates.  */
5325   if (!data->consider_all_candidates)
5326     relate_compare_use_with_all_cands (data);
5327
5328   if (dump_file && (dump_flags & TDF_DETAILS))
5329     {
5330       unsigned i;
5331
5332       fprintf (dump_file, "\n<Important Candidates>:\t");
5333       for (i = 0; i < data->vcands.length (); i++)
5334         if (data->vcands[i]->important)
5335           fprintf (dump_file, " %d,", data->vcands[i]->id);
5336       fprintf (dump_file, "\n");
5337
5338       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5339       for (i = 0; i < data->vgroups.length (); i++)
5340         {
5341           struct iv_group *group = data->vgroups[i];
5342
5343           if (group->related_cands)
5344             {
5345               fprintf (dump_file, "  Group %d:\t", group->id);
5346               dump_bitmap (dump_file, group->related_cands);
5347             }
5348         }
5349       fprintf (dump_file, "\n");
5350     }
5351 }
5352
5353 /* Determines costs of computing use of iv with an iv candidate.  */
5354
5355 static void
5356 determine_group_iv_costs (struct ivopts_data *data)
5357 {
5358   unsigned i, j;
5359   struct iv_cand *cand;
5360   struct iv_group *group;
5361   bitmap to_clear = BITMAP_ALLOC (NULL);
5362
5363   alloc_use_cost_map (data);
5364
5365   for (i = 0; i < data->vgroups.length (); i++)
5366     {
5367       group = data->vgroups[i];
5368
5369       if (data->consider_all_candidates)
5370         {
5371           for (j = 0; j < data->vcands.length (); j++)
5372             {
5373               cand = data->vcands[j];
5374               determine_group_iv_cost (data, group, cand);
5375             }
5376         }
5377       else
5378         {
5379           bitmap_iterator bi;
5380
5381           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5382             {
5383               cand = data->vcands[j];
5384               if (!determine_group_iv_cost (data, group, cand))
5385                 bitmap_set_bit (to_clear, j);
5386             }
5387
5388           /* Remove the candidates for that the cost is infinite from
5389              the list of related candidates.  */
5390           bitmap_and_compl_into (group->related_cands, to_clear);
5391           bitmap_clear (to_clear);
5392         }
5393     }
5394
5395   BITMAP_FREE (to_clear);
5396
5397   if (dump_file && (dump_flags & TDF_DETAILS))
5398     {
5399       bitmap_iterator bi;
5400
5401       /* Dump invariant variables.  */
5402       fprintf (dump_file, "\n<Invariant Vars>:\n");
5403       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5404         {
5405           struct version_info *info = ver_info (data, i);
5406           if (info->inv_id)
5407             {
5408               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5409               print_generic_expr (dump_file, info->name, TDF_SLIM);
5410               fprintf (dump_file, "%s\n",
5411                        info->has_nonlin_use ? "" : "\t(eliminable)");
5412             }
5413         }
5414
5415       /* Dump invariant expressions.  */
5416       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5417       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5418
5419       for (hash_table<iv_inv_expr_hasher>::iterator it
5420            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5421            ++it)
5422         list.safe_push (*it);
5423
5424       list.qsort (sort_iv_inv_expr_ent);
5425
5426       for (i = 0; i < list.length (); ++i)
5427         {
5428           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5429           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5430           fprintf (dump_file, "\n");
5431         }
5432
5433       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5434
5435       for (i = 0; i < data->vgroups.length (); i++)
5436         {
5437           group = data->vgroups[i];
5438
5439           fprintf (dump_file, "Group %d:\n", i);
5440           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5441           for (j = 0; j < group->n_map_members; j++)
5442             {
5443               if (!group->cost_map[j].cand
5444                   || group->cost_map[j].cost.infinite_cost_p ())
5445                 continue;
5446
5447               fprintf (dump_file, "  %d\t%d\t%d\t",
5448                        group->cost_map[j].cand->id,
5449                        group->cost_map[j].cost.cost,
5450                        group->cost_map[j].cost.complexity);
5451               if (!group->cost_map[j].inv_exprs
5452                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5453                 fprintf (dump_file, "NIL;\t");
5454               else
5455                 bitmap_print (dump_file,
5456                               group->cost_map[j].inv_exprs, "", ";\t");
5457               if (!group->cost_map[j].inv_vars
5458                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5459                 fprintf (dump_file, "NIL;\n");
5460               else
5461                 bitmap_print (dump_file,
5462                               group->cost_map[j].inv_vars, "", "\n");
5463             }
5464
5465           fprintf (dump_file, "\n");
5466         }
5467       fprintf (dump_file, "\n");
5468     }
5469 }
5470
5471 /* Determines cost of the candidate CAND.  */
5472
5473 static void
5474 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5475 {
5476   comp_cost cost_base;
5477   unsigned cost, cost_step;
5478   tree base;
5479
5480   gcc_assert (cand->iv != NULL);
5481
5482   /* There are two costs associated with the candidate -- its increment
5483      and its initialization.  The second is almost negligible for any loop
5484      that rolls enough, so we take it just very little into account.  */
5485
5486   base = cand->iv->base;
5487   cost_base = force_var_cost (data, base, NULL);
5488   /* It will be exceptional that the iv register happens to be initialized with
5489      the proper value at no cost.  In general, there will at least be a regcopy
5490      or a const set.  */
5491   if (cost_base.cost == 0)
5492     cost_base.cost = COSTS_N_INSNS (1);
5493   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5494
5495   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5496
5497   /* Prefer the original ivs unless we may gain something by replacing it.
5498      The reason is to make debugging simpler; so this is not relevant for
5499      artificial ivs created by other optimization passes.  */
5500   if (cand->pos != IP_ORIGINAL
5501       || !SSA_NAME_VAR (cand->var_before)
5502       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5503     cost++;
5504
5505   /* Prefer not to insert statements into latch unless there are some
5506      already (so that we do not create unnecessary jumps).  */
5507   if (cand->pos == IP_END
5508       && empty_block_p (ip_end_pos (data->current_loop)))
5509     cost++;
5510
5511   cand->cost = cost;
5512   cand->cost_step = cost_step;
5513 }
5514
5515 /* Determines costs of computation of the candidates.  */
5516
5517 static void
5518 determine_iv_costs (struct ivopts_data *data)
5519 {
5520   unsigned i;
5521
5522   if (dump_file && (dump_flags & TDF_DETAILS))
5523     {
5524       fprintf (dump_file, "<Candidate Costs>:\n");
5525       fprintf (dump_file, "  cand\tcost\n");
5526     }
5527
5528   for (i = 0; i < data->vcands.length (); i++)
5529     {
5530       struct iv_cand *cand = data->vcands[i];
5531
5532       determine_iv_cost (data, cand);
5533
5534       if (dump_file && (dump_flags & TDF_DETAILS))
5535         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5536     }
5537
5538   if (dump_file && (dump_flags & TDF_DETAILS))
5539     fprintf (dump_file, "\n");
5540 }
5541
5542 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5543    induction variables.  Note N_INVS includes both invariant variables and
5544    invariant expressions.  */
5545
5546 static unsigned
5547 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5548                               unsigned n_cands)
5549 {
5550   unsigned cost;
5551   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5552   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5553   bool speed = data->speed;
5554
5555   /* If there is a call in the loop body, the call-clobbered registers
5556      are not available for loop invariants.  */
5557   if (data->body_includes_call)
5558     available_regs = available_regs - target_clobbered_regs;
5559
5560   /* If we have enough registers.  */
5561   if (regs_needed + target_res_regs < available_regs)
5562     cost = n_new;
5563   /* If close to running out of registers, try to preserve them.  */
5564   else if (regs_needed <= available_regs)
5565     cost = target_reg_cost [speed] * regs_needed;
5566   /* If we run out of available registers but the number of candidates
5567      does not, we penalize extra registers using target_spill_cost.  */
5568   else if (n_cands <= available_regs)
5569     cost = target_reg_cost [speed] * available_regs
5570            + target_spill_cost [speed] * (regs_needed - available_regs);
5571   /* If the number of candidates runs out available registers, we penalize
5572      extra candidate registers using target_spill_cost * 2.  Because it is
5573      more expensive to spill induction variable than invariant.  */
5574   else
5575     cost = target_reg_cost [speed] * available_regs
5576            + target_spill_cost [speed] * (n_cands - available_regs) * 2
5577            + target_spill_cost [speed] * (regs_needed - n_cands);
5578
5579   /* Finally, add the number of candidates, so that we prefer eliminating
5580      induction variables if possible.  */
5581   return cost + n_cands;
5582 }
5583
5584 /* For each size of the induction variable set determine the penalty.  */
5585
5586 static void
5587 determine_set_costs (struct ivopts_data *data)
5588 {
5589   unsigned j, n;
5590   gphi *phi;
5591   gphi_iterator psi;
5592   tree op;
5593   struct loop *loop = data->current_loop;
5594   bitmap_iterator bi;
5595
5596   if (dump_file && (dump_flags & TDF_DETAILS))
5597     {
5598       fprintf (dump_file, "<Global Costs>:\n");
5599       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5600       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5601       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5602       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5603     }
5604
5605   n = 0;
5606   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5607     {
5608       phi = psi.phi ();
5609       op = PHI_RESULT (phi);
5610
5611       if (virtual_operand_p (op))
5612         continue;
5613
5614       if (get_iv (data, op))
5615         continue;
5616
5617       if (!POINTER_TYPE_P (TREE_TYPE (op))
5618           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5619         continue;
5620
5621       n++;
5622     }
5623
5624   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5625     {
5626       struct version_info *info = ver_info (data, j);
5627
5628       if (info->inv_id && info->has_nonlin_use)
5629         n++;
5630     }
5631
5632   data->regs_used = n;
5633   if (dump_file && (dump_flags & TDF_DETAILS))
5634     fprintf (dump_file, "  regs_used %d\n", n);
5635
5636   if (dump_file && (dump_flags & TDF_DETAILS))
5637     {
5638       fprintf (dump_file, "  cost for size:\n");
5639       fprintf (dump_file, "  ivs\tcost\n");
5640       for (j = 0; j <= 2 * target_avail_regs; j++)
5641         fprintf (dump_file, "  %d\t%d\n", j,
5642                  ivopts_estimate_reg_pressure (data, 0, j));
5643       fprintf (dump_file, "\n");
5644     }
5645 }
5646
5647 /* Returns true if A is a cheaper cost pair than B.  */
5648
5649 static bool
5650 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5651 {
5652   if (!a)
5653     return false;
5654
5655   if (!b)
5656     return true;
5657
5658   if (a->cost < b->cost)
5659     return true;
5660
5661   if (b->cost < a->cost)
5662     return false;
5663
5664   /* In case the costs are the same, prefer the cheaper candidate.  */
5665   if (a->cand->cost < b->cand->cost)
5666     return true;
5667
5668   return false;
5669 }
5670
5671 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
5672    for more expensive, equal and cheaper respectively.  */
5673
5674 static int
5675 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5676 {
5677   if (cheaper_cost_pair (a, b))
5678     return -1;
5679   if (cheaper_cost_pair (b, a))
5680     return 1;
5681
5682   return 0;
5683 }
5684
5685 /* Returns candidate by that USE is expressed in IVS.  */
5686
5687 static struct cost_pair *
5688 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5689 {
5690   return ivs->cand_for_group[group->id];
5691 }
5692
5693 /* Computes the cost field of IVS structure.  */
5694
5695 static void
5696 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5697 {
5698   comp_cost cost = ivs->cand_use_cost;
5699
5700   cost += ivs->cand_cost;
5701   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5702   ivs->cost = cost;
5703 }
5704
5705 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5706    and IVS.  */
5707
5708 static void
5709 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5710 {
5711   bitmap_iterator bi;
5712   unsigned iid;
5713
5714   if (!invs)
5715     return;
5716
5717   gcc_assert (n_inv_uses != NULL);
5718   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5719     {
5720       n_inv_uses[iid]--;
5721       if (n_inv_uses[iid] == 0)
5722         ivs->n_invs--;
5723     }
5724 }
5725
5726 /* Set USE not to be expressed by any candidate in IVS.  */
5727
5728 static void
5729 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5730                  struct iv_group *group)
5731 {
5732   unsigned gid = group->id, cid;
5733   struct cost_pair *cp;
5734
5735   cp = ivs->cand_for_group[gid];
5736   if (!cp)
5737     return;
5738   cid = cp->cand->id;
5739
5740   ivs->bad_groups++;
5741   ivs->cand_for_group[gid] = NULL;
5742   ivs->n_cand_uses[cid]--;
5743
5744   if (ivs->n_cand_uses[cid] == 0)
5745     {
5746       bitmap_clear_bit (ivs->cands, cid);
5747       ivs->n_cands--;
5748       ivs->cand_cost -= cp->cand->cost;
5749       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5750       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5751     }
5752
5753   ivs->cand_use_cost -= cp->cost;
5754   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5755   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5756   iv_ca_recount_cost (data, ivs);
5757 }
5758
5759 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5760    IVS.  */
5761
5762 static void
5763 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5764 {
5765   bitmap_iterator bi;
5766   unsigned iid;
5767
5768   if (!invs)
5769     return;
5770
5771   gcc_assert (n_inv_uses != NULL);
5772   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5773     {
5774       n_inv_uses[iid]++;
5775       if (n_inv_uses[iid] == 1)
5776         ivs->n_invs++;
5777     }
5778 }
5779
5780 /* Set cost pair for GROUP in set IVS to CP.  */
5781
5782 static void
5783 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5784               struct iv_group *group, struct cost_pair *cp)
5785 {
5786   unsigned gid = group->id, cid;
5787
5788   if (ivs->cand_for_group[gid] == cp)
5789     return;
5790
5791   if (ivs->cand_for_group[gid])
5792     iv_ca_set_no_cp (data, ivs, group);
5793
5794   if (cp)
5795     {
5796       cid = cp->cand->id;
5797
5798       ivs->bad_groups--;
5799       ivs->cand_for_group[gid] = cp;
5800       ivs->n_cand_uses[cid]++;
5801       if (ivs->n_cand_uses[cid] == 1)
5802         {
5803           bitmap_set_bit (ivs->cands, cid);
5804           ivs->n_cands++;
5805           ivs->cand_cost += cp->cand->cost;
5806           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5807           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5808         }
5809
5810       ivs->cand_use_cost += cp->cost;
5811       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5812       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5813       iv_ca_recount_cost (data, ivs);
5814     }
5815 }
5816
5817 /* Extend set IVS by expressing USE by some of the candidates in it
5818    if possible.  Consider all important candidates if candidates in
5819    set IVS don't give any result.  */
5820
5821 static void
5822 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5823                struct iv_group *group)
5824 {
5825   struct cost_pair *best_cp = NULL, *cp;
5826   bitmap_iterator bi;
5827   unsigned i;
5828   struct iv_cand *cand;
5829
5830   gcc_assert (ivs->upto >= group->id);
5831   ivs->upto++;
5832   ivs->bad_groups++;
5833
5834   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5835     {
5836       cand = data->vcands[i];
5837       cp = get_group_iv_cost (data, group, cand);
5838       if (cheaper_cost_pair (cp, best_cp))
5839         best_cp = cp;
5840     }
5841
5842   if (best_cp == NULL)
5843     {
5844       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5845         {
5846           cand = data->vcands[i];
5847           cp = get_group_iv_cost (data, group, cand);
5848           if (cheaper_cost_pair (cp, best_cp))
5849             best_cp = cp;
5850         }
5851     }
5852
5853   iv_ca_set_cp (data, ivs, group, best_cp);
5854 }
5855
5856 /* Get cost for assignment IVS.  */
5857
5858 static comp_cost
5859 iv_ca_cost (struct iv_ca *ivs)
5860 {
5861   /* This was a conditional expression but it triggered a bug in
5862      Sun C 5.5.  */
5863   if (ivs->bad_groups)
5864     return infinite_cost;
5865   else
5866     return ivs->cost;
5867 }
5868
5869 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5870    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
5871    respectively.  */
5872
5873 static int
5874 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5875                     struct iv_group *group, struct cost_pair *old_cp,
5876                     struct cost_pair *new_cp)
5877 {
5878   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5879   unsigned old_n_invs = ivs->n_invs;
5880   iv_ca_set_cp (data, ivs, group, new_cp);
5881   unsigned new_n_invs = ivs->n_invs;
5882   iv_ca_set_cp (data, ivs, group, old_cp);
5883
5884   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5885 }
5886
5887 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5888    it before NEXT.  */
5889
5890 static struct iv_ca_delta *
5891 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5892                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5893 {
5894   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5895
5896   change->group = group;
5897   change->old_cp = old_cp;
5898   change->new_cp = new_cp;
5899   change->next = next;
5900
5901   return change;
5902 }
5903
5904 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5905    are rewritten.  */
5906
5907 static struct iv_ca_delta *
5908 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5909 {
5910   struct iv_ca_delta *last;
5911
5912   if (!l2)
5913     return l1;
5914
5915   if (!l1)
5916     return l2;
5917
5918   for (last = l1; last->next; last = last->next)
5919     continue;
5920   last->next = l2;
5921
5922   return l1;
5923 }
5924
5925 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5926
5927 static struct iv_ca_delta *
5928 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5929 {
5930   struct iv_ca_delta *act, *next, *prev = NULL;
5931
5932   for (act = delta; act; act = next)
5933     {
5934       next = act->next;
5935       act->next = prev;
5936       prev = act;
5937
5938       std::swap (act->old_cp, act->new_cp);
5939     }
5940
5941   return prev;
5942 }
5943
5944 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5945    reverted instead.  */
5946
5947 static void
5948 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5949                     struct iv_ca_delta *delta, bool forward)
5950 {
5951   struct cost_pair *from, *to;
5952   struct iv_ca_delta *act;
5953
5954   if (!forward)
5955     delta = iv_ca_delta_reverse (delta);
5956
5957   for (act = delta; act; act = act->next)
5958     {
5959       from = act->old_cp;
5960       to = act->new_cp;
5961       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5962       iv_ca_set_cp (data, ivs, act->group, to);
5963     }
5964
5965   if (!forward)
5966     iv_ca_delta_reverse (delta);
5967 }
5968
5969 /* Returns true if CAND is used in IVS.  */
5970
5971 static bool
5972 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5973 {
5974   return ivs->n_cand_uses[cand->id] > 0;
5975 }
5976
5977 /* Returns number of induction variable candidates in the set IVS.  */
5978
5979 static unsigned
5980 iv_ca_n_cands (struct iv_ca *ivs)
5981 {
5982   return ivs->n_cands;
5983 }
5984
5985 /* Free the list of changes DELTA.  */
5986
5987 static void
5988 iv_ca_delta_free (struct iv_ca_delta **delta)
5989 {
5990   struct iv_ca_delta *act, *next;
5991
5992   for (act = *delta; act; act = next)
5993     {
5994       next = act->next;
5995       free (act);
5996     }
5997
5998   *delta = NULL;
5999 }
6000
6001 /* Allocates new iv candidates assignment.  */
6002
6003 static struct iv_ca *
6004 iv_ca_new (struct ivopts_data *data)
6005 {
6006   struct iv_ca *nw = XNEW (struct iv_ca);
6007
6008   nw->upto = 0;
6009   nw->bad_groups = 0;
6010   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6011                                  data->vgroups.length ());
6012   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6013   nw->cands = BITMAP_ALLOC (NULL);
6014   nw->n_cands = 0;
6015   nw->n_invs = 0;
6016   nw->cand_use_cost = no_cost;
6017   nw->cand_cost = 0;
6018   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6019   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6020   nw->cost = no_cost;
6021
6022   return nw;
6023 }
6024
6025 /* Free memory occupied by the set IVS.  */
6026
6027 static void
6028 iv_ca_free (struct iv_ca **ivs)
6029 {
6030   free ((*ivs)->cand_for_group);
6031   free ((*ivs)->n_cand_uses);
6032   BITMAP_FREE ((*ivs)->cands);
6033   free ((*ivs)->n_inv_var_uses);
6034   free ((*ivs)->n_inv_expr_uses);
6035   free (*ivs);
6036   *ivs = NULL;
6037 }
6038
6039 /* Dumps IVS to FILE.  */
6040
6041 static void
6042 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6043 {
6044   unsigned i;
6045   comp_cost cost = iv_ca_cost (ivs);
6046
6047   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6048            cost.complexity);
6049   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6050            ivs->cand_cost, ivs->cand_use_cost.cost,
6051            ivs->cand_use_cost.complexity);
6052   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6053
6054   for (i = 0; i < ivs->upto; i++)
6055     {
6056       struct iv_group *group = data->vgroups[i];
6057       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6058       if (cp)
6059         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6060                  group->id, cp->cand->id, cp->cost.cost,
6061                  cp->cost.complexity);
6062       else
6063         fprintf (file, "   group:%d --> ??\n", group->id);
6064     }
6065
6066   const char *pref = "";
6067   fprintf (file, "  invariant variables: ");
6068   for (i = 1; i <= data->max_inv_var_id; i++)
6069     if (ivs->n_inv_var_uses[i])
6070       {
6071         fprintf (file, "%s%d", pref, i);
6072         pref = ", ";
6073       }
6074
6075   pref = "";
6076   fprintf (file, "\n  invariant expressions: ");
6077   for (i = 1; i <= data->max_inv_expr_id; i++)
6078     if (ivs->n_inv_expr_uses[i])
6079       {
6080         fprintf (file, "%s%d", pref, i);
6081         pref = ", ";
6082       }
6083
6084   fprintf (file, "\n\n");
6085 }
6086
6087 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6088    new set, and store differences in DELTA.  Number of induction variables
6089    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6090    the function will try to find a solution with mimimal iv candidates.  */
6091
6092 static comp_cost
6093 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6094               struct iv_cand *cand, struct iv_ca_delta **delta,
6095               unsigned *n_ivs, bool min_ncand)
6096 {
6097   unsigned i;
6098   comp_cost cost;
6099   struct iv_group *group;
6100   struct cost_pair *old_cp, *new_cp;
6101
6102   *delta = NULL;
6103   for (i = 0; i < ivs->upto; i++)
6104     {
6105       group = data->vgroups[i];
6106       old_cp = iv_ca_cand_for_group (ivs, group);
6107
6108       if (old_cp
6109           && old_cp->cand == cand)
6110         continue;
6111
6112       new_cp = get_group_iv_cost (data, group, cand);
6113       if (!new_cp)
6114         continue;
6115
6116       if (!min_ncand)
6117         {
6118           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6119           /* Skip if new_cp depends on more invariants.  */
6120           if (cmp_invs > 0)
6121             continue;
6122
6123           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6124           /* Skip if new_cp is not cheaper.  */
6125           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6126             continue;
6127         }
6128
6129       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6130     }
6131
6132   iv_ca_delta_commit (data, ivs, *delta, true);
6133   cost = iv_ca_cost (ivs);
6134   if (n_ivs)
6135     *n_ivs = iv_ca_n_cands (ivs);
6136   iv_ca_delta_commit (data, ivs, *delta, false);
6137
6138   return cost;
6139 }
6140
6141 /* Try narrowing set IVS by removing CAND.  Return the cost of
6142    the new set and store the differences in DELTA.  START is
6143    the candidate with which we start narrowing.  */
6144
6145 static comp_cost
6146 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6147               struct iv_cand *cand, struct iv_cand *start,
6148               struct iv_ca_delta **delta)
6149 {
6150   unsigned i, ci;
6151   struct iv_group *group;
6152   struct cost_pair *old_cp, *new_cp, *cp;
6153   bitmap_iterator bi;
6154   struct iv_cand *cnd;
6155   comp_cost cost, best_cost, acost;
6156
6157   *delta = NULL;
6158   for (i = 0; i < data->vgroups.length (); i++)
6159     {
6160       group = data->vgroups[i];
6161
6162       old_cp = iv_ca_cand_for_group (ivs, group);
6163       if (old_cp->cand != cand)
6164         continue;
6165
6166       best_cost = iv_ca_cost (ivs);
6167       /* Start narrowing with START.  */
6168       new_cp = get_group_iv_cost (data, group, start);
6169
6170       if (data->consider_all_candidates)
6171         {
6172           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6173             {
6174               if (ci == cand->id || (start && ci == start->id))
6175                 continue;
6176
6177               cnd = data->vcands[ci];
6178
6179               cp = get_group_iv_cost (data, group, cnd);
6180               if (!cp)
6181                 continue;
6182
6183               iv_ca_set_cp (data, ivs, group, cp);
6184               acost = iv_ca_cost (ivs);
6185
6186               if (acost < best_cost)
6187                 {
6188                   best_cost = acost;
6189                   new_cp = cp;
6190                 }
6191             }
6192         }
6193       else
6194         {
6195           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6196             {
6197               if (ci == cand->id || (start && ci == start->id))
6198                 continue;
6199
6200               cnd = data->vcands[ci];
6201
6202               cp = get_group_iv_cost (data, group, cnd);
6203               if (!cp)
6204                 continue;
6205
6206               iv_ca_set_cp (data, ivs, group, cp);
6207               acost = iv_ca_cost (ivs);
6208
6209               if (acost < best_cost)
6210                 {
6211                   best_cost = acost;
6212                   new_cp = cp;
6213                 }
6214             }
6215         }
6216       /* Restore to old cp for use.  */
6217       iv_ca_set_cp (data, ivs, group, old_cp);
6218
6219       if (!new_cp)
6220         {
6221           iv_ca_delta_free (delta);
6222           return infinite_cost;
6223         }
6224
6225       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6226     }
6227
6228   iv_ca_delta_commit (data, ivs, *delta, true);
6229   cost = iv_ca_cost (ivs);
6230   iv_ca_delta_commit (data, ivs, *delta, false);
6231
6232   return cost;
6233 }
6234
6235 /* Try optimizing the set of candidates IVS by removing candidates different
6236    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6237    differences in DELTA.  */
6238
6239 static comp_cost
6240 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6241              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6242 {
6243   bitmap_iterator bi;
6244   struct iv_ca_delta *act_delta, *best_delta;
6245   unsigned i;
6246   comp_cost best_cost, acost;
6247   struct iv_cand *cand;
6248
6249   best_delta = NULL;
6250   best_cost = iv_ca_cost (ivs);
6251
6252   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6253     {
6254       cand = data->vcands[i];
6255
6256       if (cand == except_cand)
6257         continue;
6258
6259       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6260
6261       if (acost < best_cost)
6262         {
6263           best_cost = acost;
6264           iv_ca_delta_free (&best_delta);
6265           best_delta = act_delta;
6266         }
6267       else
6268         iv_ca_delta_free (&act_delta);
6269     }
6270
6271   if (!best_delta)
6272     {
6273       *delta = NULL;
6274       return best_cost;
6275     }
6276
6277   /* Recurse to possibly remove other unnecessary ivs.  */
6278   iv_ca_delta_commit (data, ivs, best_delta, true);
6279   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6280   iv_ca_delta_commit (data, ivs, best_delta, false);
6281   *delta = iv_ca_delta_join (best_delta, *delta);
6282   return best_cost;
6283 }
6284
6285 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6286    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6287    the corresponding cost_pair, otherwise just return BEST_CP.  */
6288
6289 static struct cost_pair*
6290 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6291                         unsigned int cand_idx, struct iv_cand *old_cand,
6292                         struct cost_pair *best_cp)
6293 {
6294   struct iv_cand *cand;
6295   struct cost_pair *cp;
6296
6297   gcc_assert (old_cand != NULL && best_cp != NULL);
6298   if (cand_idx == old_cand->id)
6299     return best_cp;
6300
6301   cand = data->vcands[cand_idx];
6302   cp = get_group_iv_cost (data, group, cand);
6303   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6304     return cp;
6305
6306   return best_cp;
6307 }
6308
6309 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6310    which are used by more than one iv uses.  For each of those candidates,
6311    this function tries to represent iv uses under that candidate using
6312    other ones with lower local cost, then tries to prune the new set.
6313    If the new set has lower cost, It returns the new cost after recording
6314    candidate replacement in list DELTA.  */
6315
6316 static comp_cost
6317 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6318                struct iv_ca_delta **delta)
6319 {
6320   bitmap_iterator bi, bj;
6321   unsigned int i, j, k;
6322   struct iv_cand *cand;
6323   comp_cost orig_cost, acost;
6324   struct iv_ca_delta *act_delta, *tmp_delta;
6325   struct cost_pair *old_cp, *best_cp = NULL;
6326
6327   *delta = NULL;
6328   orig_cost = iv_ca_cost (ivs);
6329
6330   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6331     {
6332       if (ivs->n_cand_uses[i] == 1
6333           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6334         continue;
6335
6336       cand = data->vcands[i];
6337
6338       act_delta = NULL;
6339       /*  Represent uses under current candidate using other ones with
6340           lower local cost.  */
6341       for (j = 0; j < ivs->upto; j++)
6342         {
6343           struct iv_group *group = data->vgroups[j];
6344           old_cp = iv_ca_cand_for_group (ivs, group);
6345
6346           if (old_cp->cand != cand)
6347             continue;
6348
6349           best_cp = old_cp;
6350           if (data->consider_all_candidates)
6351             for (k = 0; k < data->vcands.length (); k++)
6352               best_cp = cheaper_cost_with_cand (data, group, k,
6353                                                 old_cp->cand, best_cp);
6354           else
6355             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6356               best_cp = cheaper_cost_with_cand (data, group, k,
6357                                                 old_cp->cand, best_cp);
6358
6359           if (best_cp == old_cp)
6360             continue;
6361
6362           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6363         }
6364       /* No need for further prune.  */
6365       if (!act_delta)
6366         continue;
6367
6368       /* Prune the new candidate set.  */
6369       iv_ca_delta_commit (data, ivs, act_delta, true);
6370       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6371       iv_ca_delta_commit (data, ivs, act_delta, false);
6372       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6373
6374       if (acost < orig_cost)
6375         {
6376           *delta = act_delta;
6377           return acost;
6378         }
6379       else
6380         iv_ca_delta_free (&act_delta);
6381     }
6382
6383   return orig_cost;
6384 }
6385
6386 /* Tries to extend the sets IVS in the best possible way in order to
6387    express the GROUP.  If ORIGINALP is true, prefer candidates from
6388    the original set of IVs, otherwise favor important candidates not
6389    based on any memory object.  */
6390
6391 static bool
6392 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6393                   struct iv_group *group, bool originalp)
6394 {
6395   comp_cost best_cost, act_cost;
6396   unsigned i;
6397   bitmap_iterator bi;
6398   struct iv_cand *cand;
6399   struct iv_ca_delta *best_delta = NULL, *act_delta;
6400   struct cost_pair *cp;
6401
6402   iv_ca_add_group (data, ivs, group);
6403   best_cost = iv_ca_cost (ivs);
6404   cp = iv_ca_cand_for_group (ivs, group);
6405   if (cp)
6406     {
6407       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6408       iv_ca_set_no_cp (data, ivs, group);
6409     }
6410
6411   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6412      first try important candidates not based on any memory object.  Only if
6413      this fails, try the specific ones.  Rationale -- in loops with many
6414      variables the best choice often is to use just one generic biv.  If we
6415      added here many ivs specific to the uses, the optimization algorithm later
6416      would be likely to get stuck in a local minimum, thus causing us to create
6417      too many ivs.  The approach from few ivs to more seems more likely to be
6418      successful -- starting from few ivs, replacing an expensive use by a
6419      specific iv should always be a win.  */
6420   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6421     {
6422       cand = data->vcands[i];
6423
6424       if (originalp && cand->pos !=IP_ORIGINAL)
6425         continue;
6426
6427       if (!originalp && cand->iv->base_object != NULL_TREE)
6428         continue;
6429
6430       if (iv_ca_cand_used_p (ivs, cand))
6431         continue;
6432
6433       cp = get_group_iv_cost (data, group, cand);
6434       if (!cp)
6435         continue;
6436
6437       iv_ca_set_cp (data, ivs, group, cp);
6438       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6439                                true);
6440       iv_ca_set_no_cp (data, ivs, group);
6441       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6442
6443       if (act_cost < best_cost)
6444         {
6445           best_cost = act_cost;
6446
6447           iv_ca_delta_free (&best_delta);
6448           best_delta = act_delta;
6449         }
6450       else
6451         iv_ca_delta_free (&act_delta);
6452     }
6453
6454   if (best_cost.infinite_cost_p ())
6455     {
6456       for (i = 0; i < group->n_map_members; i++)
6457         {
6458           cp = group->cost_map + i;
6459           cand = cp->cand;
6460           if (!cand)
6461             continue;
6462
6463           /* Already tried this.  */
6464           if (cand->important)
6465             {
6466               if (originalp && cand->pos == IP_ORIGINAL)
6467                 continue;
6468               if (!originalp && cand->iv->base_object == NULL_TREE)
6469                 continue;
6470             }
6471
6472           if (iv_ca_cand_used_p (ivs, cand))
6473             continue;
6474
6475           act_delta = NULL;
6476           iv_ca_set_cp (data, ivs, group, cp);
6477           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6478           iv_ca_set_no_cp (data, ivs, group);
6479           act_delta = iv_ca_delta_add (group,
6480                                        iv_ca_cand_for_group (ivs, group),
6481                                        cp, act_delta);
6482
6483           if (act_cost < best_cost)
6484             {
6485               best_cost = act_cost;
6486
6487               if (best_delta)
6488                 iv_ca_delta_free (&best_delta);
6489               best_delta = act_delta;
6490             }
6491           else
6492             iv_ca_delta_free (&act_delta);
6493         }
6494     }
6495
6496   iv_ca_delta_commit (data, ivs, best_delta, true);
6497   iv_ca_delta_free (&best_delta);
6498
6499   return !best_cost.infinite_cost_p ();
6500 }
6501
6502 /* Finds an initial assignment of candidates to uses.  */
6503
6504 static struct iv_ca *
6505 get_initial_solution (struct ivopts_data *data, bool originalp)
6506 {
6507   unsigned i;
6508   struct iv_ca *ivs = iv_ca_new (data);
6509
6510   for (i = 0; i < data->vgroups.length (); i++)
6511     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6512       {
6513         iv_ca_free (&ivs);
6514         return NULL;
6515       }
6516
6517   return ivs;
6518 }
6519
6520 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6521    points to a bool variable, this function tries to break local
6522    optimal fixed-point by replacing candidates in IVS if it's true.  */
6523
6524 static bool
6525 try_improve_iv_set (struct ivopts_data *data,
6526                     struct iv_ca *ivs, bool *try_replace_p)
6527 {
6528   unsigned i, n_ivs;
6529   comp_cost acost, best_cost = iv_ca_cost (ivs);
6530   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6531   struct iv_cand *cand;
6532
6533   /* Try extending the set of induction variables by one.  */
6534   for (i = 0; i < data->vcands.length (); i++)
6535     {
6536       cand = data->vcands[i];
6537
6538       if (iv_ca_cand_used_p (ivs, cand))
6539         continue;
6540
6541       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6542       if (!act_delta)
6543         continue;
6544
6545       /* If we successfully added the candidate and the set is small enough,
6546          try optimizing it by removing other candidates.  */
6547       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6548         {
6549           iv_ca_delta_commit (data, ivs, act_delta, true);
6550           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6551           iv_ca_delta_commit (data, ivs, act_delta, false);
6552           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6553         }
6554
6555       if (acost < best_cost)
6556         {
6557           best_cost = acost;
6558           iv_ca_delta_free (&best_delta);
6559           best_delta = act_delta;
6560         }
6561       else
6562         iv_ca_delta_free (&act_delta);
6563     }
6564
6565   if (!best_delta)
6566     {
6567       /* Try removing the candidates from the set instead.  */
6568       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6569
6570       if (!best_delta && *try_replace_p)
6571         {
6572           *try_replace_p = false;
6573           /* So far candidate selecting algorithm tends to choose fewer IVs
6574              so that it can handle cases in which loops have many variables
6575              but the best choice is often to use only one general biv.  One
6576              weakness is it can't handle opposite cases, in which different
6577              candidates should be chosen with respect to each use.  To solve
6578              the problem, we replace candidates in a manner described by the
6579              comments of iv_ca_replace, thus give general algorithm a chance
6580              to break local optimal fixed-point in these cases.  */
6581           best_cost = iv_ca_replace (data, ivs, &best_delta);
6582         }
6583
6584       if (!best_delta)
6585         return false;
6586     }
6587
6588   iv_ca_delta_commit (data, ivs, best_delta, true);
6589   gcc_assert (best_cost == iv_ca_cost (ivs));
6590   iv_ca_delta_free (&best_delta);
6591   return true;
6592 }
6593
6594 /* Attempts to find the optimal set of induction variables.  We do simple
6595    greedy heuristic -- we try to replace at most one candidate in the selected
6596    solution and remove the unused ivs while this improves the cost.  */
6597
6598 static struct iv_ca *
6599 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6600 {
6601   struct iv_ca *set;
6602   bool try_replace_p = true;
6603
6604   /* Get the initial solution.  */
6605   set = get_initial_solution (data, originalp);
6606   if (!set)
6607     {
6608       if (dump_file && (dump_flags & TDF_DETAILS))
6609         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6610       return NULL;
6611     }
6612
6613   if (dump_file && (dump_flags & TDF_DETAILS))
6614     {
6615       fprintf (dump_file, "Initial set of candidates:\n");
6616       iv_ca_dump (data, dump_file, set);
6617     }
6618
6619   while (try_improve_iv_set (data, set, &try_replace_p))
6620     {
6621       if (dump_file && (dump_flags & TDF_DETAILS))
6622         {
6623           fprintf (dump_file, "Improved to:\n");
6624           iv_ca_dump (data, dump_file, set);
6625         }
6626     }
6627
6628   return set;
6629 }
6630
6631 static struct iv_ca *
6632 find_optimal_iv_set (struct ivopts_data *data)
6633 {
6634   unsigned i;
6635   comp_cost cost, origcost;
6636   struct iv_ca *set, *origset;
6637
6638   /* Determine the cost based on a strategy that starts with original IVs,
6639      and try again using a strategy that prefers candidates not based
6640      on any IVs.  */
6641   origset = find_optimal_iv_set_1 (data, true);
6642   set = find_optimal_iv_set_1 (data, false);
6643
6644   if (!origset && !set)
6645     return NULL;
6646
6647   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6648   cost = set ? iv_ca_cost (set) : infinite_cost;
6649
6650   if (dump_file && (dump_flags & TDF_DETAILS))
6651     {
6652       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6653                origcost.cost, origcost.complexity);
6654       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6655                cost.cost, cost.complexity);
6656     }
6657
6658   /* Choose the one with the best cost.  */
6659   if (origcost <= cost)
6660     {
6661       if (set)
6662         iv_ca_free (&set);
6663       set = origset;
6664     }
6665   else if (origset)
6666     iv_ca_free (&origset);
6667
6668   for (i = 0; i < data->vgroups.length (); i++)
6669     {
6670       struct iv_group *group = data->vgroups[i];
6671       group->selected = iv_ca_cand_for_group (set, group)->cand;
6672     }
6673
6674   return set;
6675 }
6676
6677 /* Creates a new induction variable corresponding to CAND.  */
6678
6679 static void
6680 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6681 {
6682   gimple_stmt_iterator incr_pos;
6683   tree base;
6684   struct iv_use *use;
6685   struct iv_group *group;
6686   bool after = false;
6687
6688   gcc_assert (cand->iv != NULL);
6689
6690   switch (cand->pos)
6691     {
6692     case IP_NORMAL:
6693       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6694       break;
6695
6696     case IP_END:
6697       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6698       after = true;
6699       break;
6700
6701     case IP_AFTER_USE:
6702       after = true;
6703       /* fall through */
6704     case IP_BEFORE_USE:
6705       incr_pos = gsi_for_stmt (cand->incremented_at);
6706       break;
6707
6708     case IP_ORIGINAL:
6709       /* Mark that the iv is preserved.  */
6710       name_info (data, cand->var_before)->preserve_biv = true;
6711       name_info (data, cand->var_after)->preserve_biv = true;
6712
6713       /* Rewrite the increment so that it uses var_before directly.  */
6714       use = find_interesting_uses_op (data, cand->var_after);
6715       group = data->vgroups[use->group_id];
6716       group->selected = cand;
6717       return;
6718     }
6719
6720   gimple_add_tmp_var (cand->var_before);
6721
6722   base = unshare_expr (cand->iv->base);
6723
6724   create_iv (base, unshare_expr (cand->iv->step),
6725              cand->var_before, data->current_loop,
6726              &incr_pos, after, &cand->var_before, &cand->var_after);
6727 }
6728
6729 /* Creates new induction variables described in SET.  */
6730
6731 static void
6732 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6733 {
6734   unsigned i;
6735   struct iv_cand *cand;
6736   bitmap_iterator bi;
6737
6738   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6739     {
6740       cand = data->vcands[i];
6741       create_new_iv (data, cand);
6742     }
6743
6744   if (dump_file && (dump_flags & TDF_DETAILS))
6745     {
6746       fprintf (dump_file, "Selected IV set for loop %d",
6747                data->current_loop->num);
6748       if (data->loop_loc != UNKNOWN_LOCATION)
6749         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6750                  LOCATION_LINE (data->loop_loc));
6751       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6752                avg_loop_niter (data->current_loop));
6753       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6754       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6755         {
6756           cand = data->vcands[i];
6757           dump_cand (dump_file, cand);
6758         }
6759       fprintf (dump_file, "\n");
6760     }
6761 }
6762
6763 /* Rewrites USE (definition of iv used in a nonlinear expression)
6764    using candidate CAND.  */
6765
6766 static void
6767 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6768                             struct iv_use *use, struct iv_cand *cand)
6769 {
6770   gassign *ass;
6771   gimple_stmt_iterator bsi;
6772   tree comp, type = get_use_type (use), tgt;
6773
6774   /* An important special case -- if we are asked to express value of
6775      the original iv by itself, just exit; there is no need to
6776      introduce a new computation (that might also need casting the
6777      variable to unsigned and back).  */
6778   if (cand->pos == IP_ORIGINAL
6779       && cand->incremented_at == use->stmt)
6780     {
6781       tree op = NULL_TREE;
6782       enum tree_code stmt_code;
6783
6784       gcc_assert (is_gimple_assign (use->stmt));
6785       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6786
6787       /* Check whether we may leave the computation unchanged.
6788          This is the case only if it does not rely on other
6789          computations in the loop -- otherwise, the computation
6790          we rely upon may be removed in remove_unused_ivs,
6791          thus leading to ICE.  */
6792       stmt_code = gimple_assign_rhs_code (use->stmt);
6793       if (stmt_code == PLUS_EXPR
6794           || stmt_code == MINUS_EXPR
6795           || stmt_code == POINTER_PLUS_EXPR)
6796         {
6797           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6798             op = gimple_assign_rhs2 (use->stmt);
6799           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6800             op = gimple_assign_rhs1 (use->stmt);
6801         }
6802
6803       if (op != NULL_TREE)
6804         {
6805           if (expr_invariant_in_loop_p (data->current_loop, op))
6806             return;
6807           if (TREE_CODE (op) == SSA_NAME)
6808             {
6809               struct iv *iv = get_iv (data, op);
6810               if (iv != NULL && integer_zerop (iv->step))
6811                 return;
6812             }
6813         }
6814     }
6815
6816   switch (gimple_code (use->stmt))
6817     {
6818     case GIMPLE_PHI:
6819       tgt = PHI_RESULT (use->stmt);
6820
6821       /* If we should keep the biv, do not replace it.  */
6822       if (name_info (data, tgt)->preserve_biv)
6823         return;
6824
6825       bsi = gsi_after_labels (gimple_bb (use->stmt));
6826       break;
6827
6828     case GIMPLE_ASSIGN:
6829       tgt = gimple_assign_lhs (use->stmt);
6830       bsi = gsi_for_stmt (use->stmt);
6831       break;
6832
6833     default:
6834       gcc_unreachable ();
6835     }
6836
6837   aff_tree aff_inv, aff_var;
6838   if (!get_computation_aff_1 (data->current_loop, use->stmt,
6839                               use, cand, &aff_inv, &aff_var))
6840     gcc_unreachable ();
6841
6842   unshare_aff_combination (&aff_inv);
6843   unshare_aff_combination (&aff_var);
6844   /* Prefer CSE opportunity than loop invariant by adding offset at last
6845      so that iv_uses have different offsets can be CSEed.  */
6846   widest_int offset = aff_inv.offset;
6847   aff_inv.offset = 0;
6848
6849   gimple_seq stmt_list = NULL, seq = NULL;
6850   tree comp_op1 = aff_combination_to_tree (&aff_inv);
6851   tree comp_op2 = aff_combination_to_tree (&aff_var);
6852   gcc_assert (comp_op1 && comp_op2);
6853
6854   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6855   gimple_seq_add_seq (&stmt_list, seq);
6856   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6857   gimple_seq_add_seq (&stmt_list, seq);
6858
6859   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6860     std::swap (comp_op1, comp_op2);
6861
6862   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6863     {
6864       comp = fold_build_pointer_plus (comp_op1,
6865                                       fold_convert (sizetype, comp_op2));
6866       comp = fold_build_pointer_plus (comp,
6867                                       wide_int_to_tree (sizetype, offset));
6868     }
6869   else
6870     {
6871       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6872                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
6873       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6874                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6875     }
6876
6877   comp = fold_convert (type, comp);
6878   if (!valid_gimple_rhs_p (comp)
6879       || (gimple_code (use->stmt) != GIMPLE_PHI
6880           /* We can't allow re-allocating the stmt as it might be pointed
6881              to still.  */
6882           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6883               >= gimple_num_ops (gsi_stmt (bsi)))))
6884     {
6885       comp = force_gimple_operand (comp, &seq, true, NULL);
6886       gimple_seq_add_seq (&stmt_list, seq);
6887       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6888         {
6889           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6890           /* As this isn't a plain copy we have to reset alignment
6891              information.  */
6892           if (SSA_NAME_PTR_INFO (comp))
6893             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6894         }
6895     }
6896
6897   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6898   if (gimple_code (use->stmt) == GIMPLE_PHI)
6899     {
6900       ass = gimple_build_assign (tgt, comp);
6901       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6902
6903       bsi = gsi_for_stmt (use->stmt);
6904       remove_phi_node (&bsi, false);
6905     }
6906   else
6907     {
6908       gimple_assign_set_rhs_from_tree (&bsi, comp);
6909       use->stmt = gsi_stmt (bsi);
6910     }
6911 }
6912
6913 /* Performs a peephole optimization to reorder the iv update statement with
6914    a mem ref to enable instruction combining in later phases. The mem ref uses
6915    the iv value before the update, so the reordering transformation requires
6916    adjustment of the offset. CAND is the selected IV_CAND.
6917
6918    Example:
6919
6920    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6921    iv2 = iv1 + 1;
6922
6923    if (t < val)      (1)
6924      goto L;
6925    goto Head;
6926
6927
6928    directly propagating t over to (1) will introduce overlapping live range
6929    thus increase register pressure. This peephole transform it into:
6930
6931
6932    iv2 = iv1 + 1;
6933    t = MEM_REF (base, iv2, 8, 8);
6934    if (t < val)
6935      goto L;
6936    goto Head;
6937 */
6938
6939 static void
6940 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6941 {
6942   tree var_after;
6943   gimple *iv_update, *stmt;
6944   basic_block bb;
6945   gimple_stmt_iterator gsi, gsi_iv;
6946
6947   if (cand->pos != IP_NORMAL)
6948     return;
6949
6950   var_after = cand->var_after;
6951   iv_update = SSA_NAME_DEF_STMT (var_after);
6952
6953   bb = gimple_bb (iv_update);
6954   gsi = gsi_last_nondebug_bb (bb);
6955   stmt = gsi_stmt (gsi);
6956
6957   /* Only handle conditional statement for now.  */
6958   if (gimple_code (stmt) != GIMPLE_COND)
6959     return;
6960
6961   gsi_prev_nondebug (&gsi);
6962   stmt = gsi_stmt (gsi);
6963   if (stmt != iv_update)
6964     return;
6965
6966   gsi_prev_nondebug (&gsi);
6967   if (gsi_end_p (gsi))
6968     return;
6969
6970   stmt = gsi_stmt (gsi);
6971   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6972     return;
6973
6974   if (stmt != use->stmt)
6975     return;
6976
6977   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6978     return;
6979
6980   if (dump_file && (dump_flags & TDF_DETAILS))
6981     {
6982       fprintf (dump_file, "Reordering \n");
6983       print_gimple_stmt (dump_file, iv_update, 0);
6984       print_gimple_stmt (dump_file, use->stmt, 0);
6985       fprintf (dump_file, "\n");
6986     }
6987
6988   gsi = gsi_for_stmt (use->stmt);
6989   gsi_iv = gsi_for_stmt (iv_update);
6990   gsi_move_before (&gsi_iv, &gsi);
6991
6992   cand->pos = IP_BEFORE_USE;
6993   cand->incremented_at = use->stmt;
6994 }
6995
6996 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6997
6998 static void
6999 rewrite_use_address (struct ivopts_data *data,
7000                      struct iv_use *use, struct iv_cand *cand)
7001 {
7002   aff_tree aff;
7003   bool ok;
7004
7005   adjust_iv_update_pos (cand, use);
7006   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7007   gcc_assert (ok);
7008   unshare_aff_combination (&aff);
7009
7010   /* To avoid undefined overflow problems, all IV candidates use unsigned
7011      integer types.  The drawback is that this makes it impossible for
7012      create_mem_ref to distinguish an IV that is based on a memory object
7013      from one that represents simply an offset.
7014
7015      To work around this problem, we pass a hint to create_mem_ref that
7016      indicates which variable (if any) in aff is an IV based on a memory
7017      object.  Note that we only consider the candidate.  If this is not
7018      based on an object, the base of the reference is in some subexpression
7019      of the use -- but these will use pointer types, so they are recognized
7020      by the create_mem_ref heuristics anyway.  */
7021   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7022   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7023   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7024   tree type = TREE_TYPE (*use->op_p);
7025   unsigned int align = get_object_alignment (*use->op_p);
7026   if (align != TYPE_ALIGN (type))
7027     type = build_aligned_type (type, align);
7028
7029   tree ref = create_mem_ref (&bsi, type, &aff,
7030                              reference_alias_ptr_type (*use->op_p),
7031                              iv, base_hint, data->speed);
7032
7033   copy_ref_info (ref, *use->op_p);
7034   *use->op_p = ref;
7035 }
7036
7037 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7038    candidate CAND.  */
7039
7040 static void
7041 rewrite_use_compare (struct ivopts_data *data,
7042                      struct iv_use *use, struct iv_cand *cand)
7043 {
7044   tree comp, op, bound;
7045   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7046   enum tree_code compare;
7047   struct iv_group *group = data->vgroups[use->group_id];
7048   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7049
7050   bound = cp->value;
7051   if (bound)
7052     {
7053       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7054       tree var_type = TREE_TYPE (var);
7055       gimple_seq stmts;
7056
7057       if (dump_file && (dump_flags & TDF_DETAILS))
7058         {
7059           fprintf (dump_file, "Replacing exit test: ");
7060           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7061         }
7062       compare = cp->comp;
7063       bound = unshare_expr (fold_convert (var_type, bound));
7064       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7065       if (stmts)
7066         gsi_insert_seq_on_edge_immediate (
7067                 loop_preheader_edge (data->current_loop),
7068                 stmts);
7069
7070       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7071       gimple_cond_set_lhs (cond_stmt, var);
7072       gimple_cond_set_code (cond_stmt, compare);
7073       gimple_cond_set_rhs (cond_stmt, op);
7074       return;
7075     }
7076
7077   /* The induction variable elimination failed; just express the original
7078      giv.  */
7079   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7080   gcc_assert (comp != NULL_TREE);
7081   gcc_assert (use->op_p != NULL);
7082   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7083                                          SSA_NAME_VAR (*use->op_p),
7084                                          true, GSI_SAME_STMT);
7085 }
7086
7087 /* Rewrite the groups using the selected induction variables.  */
7088
7089 static void
7090 rewrite_groups (struct ivopts_data *data)
7091 {
7092   unsigned i, j;
7093
7094   for (i = 0; i < data->vgroups.length (); i++)
7095     {
7096       struct iv_group *group = data->vgroups[i];
7097       struct iv_cand *cand = group->selected;
7098
7099       gcc_assert (cand);
7100
7101       if (group->type == USE_NONLINEAR_EXPR)
7102         {
7103           for (j = 0; j < group->vuses.length (); j++)
7104             {
7105               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7106               update_stmt (group->vuses[j]->stmt);
7107             }
7108         }
7109       else if (group->type == USE_ADDRESS)
7110         {
7111           for (j = 0; j < group->vuses.length (); j++)
7112             {
7113               rewrite_use_address (data, group->vuses[j], cand);
7114               update_stmt (group->vuses[j]->stmt);
7115             }
7116         }
7117       else
7118         {
7119           gcc_assert (group->type == USE_COMPARE);
7120
7121           for (j = 0; j < group->vuses.length (); j++)
7122             {
7123               rewrite_use_compare (data, group->vuses[j], cand);
7124               update_stmt (group->vuses[j]->stmt);
7125             }
7126         }
7127     }
7128 }
7129
7130 /* Removes the ivs that are not used after rewriting.  */
7131
7132 static void
7133 remove_unused_ivs (struct ivopts_data *data)
7134 {
7135   unsigned j;
7136   bitmap_iterator bi;
7137   bitmap toremove = BITMAP_ALLOC (NULL);
7138
7139   /* Figure out an order in which to release SSA DEFs so that we don't
7140      release something that we'd have to propagate into a debug stmt
7141      afterwards.  */
7142   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7143     {
7144       struct version_info *info;
7145
7146       info = ver_info (data, j);
7147       if (info->iv
7148           && !integer_zerop (info->iv->step)
7149           && !info->inv_id
7150           && !info->iv->nonlin_use
7151           && !info->preserve_biv)
7152         {
7153           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7154
7155           tree def = info->iv->ssa_name;
7156
7157           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7158             {
7159               imm_use_iterator imm_iter;
7160               use_operand_p use_p;
7161               gimple *stmt;
7162               int count = 0;
7163
7164               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7165                 {
7166                   if (!gimple_debug_bind_p (stmt))
7167                     continue;
7168
7169                   /* We just want to determine whether to do nothing
7170                      (count == 0), to substitute the computed
7171                      expression into a single use of the SSA DEF by
7172                      itself (count == 1), or to use a debug temp
7173                      because the SSA DEF is used multiple times or as
7174                      part of a larger expression (count > 1). */
7175                   count++;
7176                   if (gimple_debug_bind_get_value (stmt) != def)
7177                     count++;
7178
7179                   if (count > 1)
7180                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7181                 }
7182
7183               if (!count)
7184                 continue;
7185
7186               struct iv_use dummy_use;
7187               struct iv_cand *best_cand = NULL, *cand;
7188               unsigned i, best_pref = 0, cand_pref;
7189
7190               memset (&dummy_use, 0, sizeof (dummy_use));
7191               dummy_use.iv = info->iv;
7192               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7193                 {
7194                   cand = data->vgroups[i]->selected;
7195                   if (cand == best_cand)
7196                     continue;
7197                   cand_pref = operand_equal_p (cand->iv->step,
7198                                                info->iv->step, 0)
7199                     ? 4 : 0;
7200                   cand_pref
7201                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7202                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7203                     ? 2 : 0;
7204                   cand_pref
7205                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7206                     ? 1 : 0;
7207                   if (best_cand == NULL || best_pref < cand_pref)
7208                     {
7209                       best_cand = cand;
7210                       best_pref = cand_pref;
7211                     }
7212                 }
7213
7214               if (!best_cand)
7215                 continue;
7216
7217               tree comp = get_computation_at (data->current_loop,
7218                                               SSA_NAME_DEF_STMT (def),
7219                                               &dummy_use, best_cand);
7220               if (!comp)
7221                 continue;
7222
7223               if (count > 1)
7224                 {
7225                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7226                   DECL_ARTIFICIAL (vexpr) = 1;
7227                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7228                   if (SSA_NAME_VAR (def))
7229                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7230                   else
7231                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7232                   gdebug *def_temp
7233                     = gimple_build_debug_bind (vexpr, comp, NULL);
7234                   gimple_stmt_iterator gsi;
7235
7236                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7237                     gsi = gsi_after_labels (gimple_bb
7238                                             (SSA_NAME_DEF_STMT (def)));
7239                   else
7240                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7241
7242                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7243                   comp = vexpr;
7244                 }
7245
7246               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7247                 {
7248                   if (!gimple_debug_bind_p (stmt))
7249                     continue;
7250
7251                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7252                     SET_USE (use_p, comp);
7253
7254                   update_stmt (stmt);
7255                 }
7256             }
7257         }
7258     }
7259
7260   release_defs_bitset (toremove);
7261
7262   BITMAP_FREE (toremove);
7263 }
7264
7265 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7266    for hash_map::traverse.  */
7267
7268 bool
7269 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7270 {
7271   free (value);
7272   return true;
7273 }
7274
7275 /* Frees data allocated by the optimization of a single loop.  */
7276
7277 static void
7278 free_loop_data (struct ivopts_data *data)
7279 {
7280   unsigned i, j;
7281   bitmap_iterator bi;
7282   tree obj;
7283
7284   if (data->niters)
7285     {
7286       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7287       delete data->niters;
7288       data->niters = NULL;
7289     }
7290
7291   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7292     {
7293       struct version_info *info;
7294
7295       info = ver_info (data, i);
7296       info->iv = NULL;
7297       info->has_nonlin_use = false;
7298       info->preserve_biv = false;
7299       info->inv_id = 0;
7300     }
7301   bitmap_clear (data->relevant);
7302   bitmap_clear (data->important_candidates);
7303
7304   for (i = 0; i < data->vgroups.length (); i++)
7305     {
7306       struct iv_group *group = data->vgroups[i];
7307
7308       for (j = 0; j < group->vuses.length (); j++)
7309         free (group->vuses[j]);
7310       group->vuses.release ();
7311
7312       BITMAP_FREE (group->related_cands);
7313       for (j = 0; j < group->n_map_members; j++)
7314         {
7315           if (group->cost_map[j].inv_vars)
7316             BITMAP_FREE (group->cost_map[j].inv_vars);
7317           if (group->cost_map[j].inv_exprs)
7318             BITMAP_FREE (group->cost_map[j].inv_exprs);
7319         }
7320
7321       free (group->cost_map);
7322       free (group);
7323     }
7324   data->vgroups.truncate (0);
7325
7326   for (i = 0; i < data->vcands.length (); i++)
7327     {
7328       struct iv_cand *cand = data->vcands[i];
7329
7330       if (cand->inv_vars)
7331         BITMAP_FREE (cand->inv_vars);
7332       if (cand->inv_exprs)
7333         BITMAP_FREE (cand->inv_exprs);
7334       free (cand);
7335     }
7336   data->vcands.truncate (0);
7337
7338   if (data->version_info_size < num_ssa_names)
7339     {
7340       data->version_info_size = 2 * num_ssa_names;
7341       free (data->version_info);
7342       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7343     }
7344
7345   data->max_inv_var_id = 0;
7346   data->max_inv_expr_id = 0;
7347
7348   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7349     SET_DECL_RTL (obj, NULL_RTX);
7350
7351   decl_rtl_to_reset.truncate (0);
7352
7353   data->inv_expr_tab->empty ();
7354
7355   data->iv_common_cand_tab->empty ();
7356   data->iv_common_cands.truncate (0);
7357 }
7358
7359 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7360    loop tree.  */
7361
7362 static void
7363 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7364 {
7365   free_loop_data (data);
7366   free (data->version_info);
7367   BITMAP_FREE (data->relevant);
7368   BITMAP_FREE (data->important_candidates);
7369
7370   decl_rtl_to_reset.release ();
7371   data->vgroups.release ();
7372   data->vcands.release ();
7373   delete data->inv_expr_tab;
7374   data->inv_expr_tab = NULL;
7375   free_affine_expand_cache (&data->name_expansion_cache);
7376   delete data->iv_common_cand_tab;
7377   data->iv_common_cand_tab = NULL;
7378   data->iv_common_cands.release ();
7379   obstack_free (&data->iv_obstack, NULL);
7380 }
7381
7382 /* Returns true if the loop body BODY includes any function calls.  */
7383
7384 static bool
7385 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7386 {
7387   gimple_stmt_iterator gsi;
7388   unsigned i;
7389
7390   for (i = 0; i < num_nodes; i++)
7391     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7392       {
7393         gimple *stmt = gsi_stmt (gsi);
7394         if (is_gimple_call (stmt)
7395             && !gimple_call_internal_p (stmt)
7396             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7397           return true;
7398       }
7399   return false;
7400 }
7401
7402 /* Optimizes the LOOP.  Returns true if anything changed.  */
7403
7404 static bool
7405 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7406 {
7407   bool changed = false;
7408   struct iv_ca *iv_ca;
7409   edge exit = single_dom_exit (loop);
7410   basic_block *body;
7411
7412   gcc_assert (!data->niters);
7413   data->current_loop = loop;
7414   data->loop_loc = find_loop_location (loop);
7415   data->speed = optimize_loop_for_speed_p (loop);
7416
7417   if (dump_file && (dump_flags & TDF_DETAILS))
7418     {
7419       fprintf (dump_file, "Processing loop %d", loop->num);
7420       if (data->loop_loc != UNKNOWN_LOCATION)
7421         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7422                  LOCATION_LINE (data->loop_loc));
7423       fprintf (dump_file, "\n");
7424
7425       if (exit)
7426         {
7427           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7428                    exit->src->index, exit->dest->index);
7429           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7430           fprintf (dump_file, "\n");
7431         }
7432
7433       fprintf (dump_file, "\n");
7434     }
7435
7436   body = get_loop_body (loop);
7437   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7438   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7439   free (body);
7440
7441   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7442
7443   /* For each ssa name determines whether it behaves as an induction variable
7444      in some loop.  */
7445   if (!find_induction_variables (data))
7446     goto finish;
7447
7448   /* Finds interesting uses (item 1).  */
7449   find_interesting_uses (data);
7450   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7451     goto finish;
7452
7453   /* Finds candidates for the induction variables (item 2).  */
7454   find_iv_candidates (data);
7455
7456   /* Calculates the costs (item 3, part 1).  */
7457   determine_iv_costs (data);
7458   determine_group_iv_costs (data);
7459   determine_set_costs (data);
7460
7461   /* Find the optimal set of induction variables (item 3, part 2).  */
7462   iv_ca = find_optimal_iv_set (data);
7463   if (!iv_ca)
7464     goto finish;
7465   changed = true;
7466
7467   /* Create the new induction variables (item 4, part 1).  */
7468   create_new_ivs (data, iv_ca);
7469   iv_ca_free (&iv_ca);
7470
7471   /* Rewrite the uses (item 4, part 2).  */
7472   rewrite_groups (data);
7473
7474   /* Remove the ivs that are unused after rewriting.  */
7475   remove_unused_ivs (data);
7476
7477   /* We have changed the structure of induction variables; it might happen
7478      that definitions in the scev database refer to some of them that were
7479      eliminated.  */
7480   scev_reset ();
7481
7482 finish:
7483   free_loop_data (data);
7484
7485   return changed;
7486 }
7487
7488 /* Main entry point.  Optimizes induction variables in loops.  */
7489
7490 void
7491 tree_ssa_iv_optimize (void)
7492 {
7493   struct loop *loop;
7494   struct ivopts_data data;
7495
7496   tree_ssa_iv_optimize_init (&data);
7497
7498   /* Optimize the loops starting with the innermost ones.  */
7499   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7500     {
7501       if (dump_file && (dump_flags & TDF_DETAILS))
7502         flow_loop_dump (loop, dump_file, NULL, 1);
7503
7504       tree_ssa_iv_optimize_loop (&data, loop);
7505     }
7506
7507   tree_ssa_iv_optimize_finalize (&data);
7508 }
7509
7510 #include "gt-tree-ssa-loop-ivopts.h"