gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2017 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "memmodel.h"
  79 #include "tm_p.h"
  80 #include "ssa.h"
  81 #include "expmed.h"
  82 #include "insn-config.h"
  83 #include "emit-rtl.h"
  84 #include "recog.h"
  85 #include "cgraph.h"
  86 #include "gimple-pretty-print.h"
  87 #include "alias.h"
  88 #include "fold-const.h"
  89 #include "stor-layout.h"
  90 #include "tree-eh.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "tree-cfg.h"
  95 #include "tree-ssa-loop-ivopts.h"
  96 #include "tree-ssa-loop-manip.h"
  97 #include "tree-ssa-loop-niter.h"
  98 #include "tree-ssa-loop.h"
  99 #include "explow.h"
 100 #include "expr.h"
 101 #include "tree-dfa.h"
 102 #include "tree-ssa.h"
 103 #include "cfgloop.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "tree-affine.h"
 107 #include "tree-ssa-propagate.h"
 108 #include "tree-ssa-address.h"
 109 #include "builtins.h"
 110 #include "tree-vectorizer.h"
 111
 112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 113    cost of different addressing modes.  This should be moved to a TBD
 114    interface between the GIMPLE and RTL worlds.  */
 115
 116 /* The infinite cost.  */
 117 #define INFTY 10000000
 118
 119 /* Returns the expected number of loop iterations for LOOP.
 120    The average trip count is computed from profile data if it
 121    exists. */
 122
 123 static inline HOST_WIDE_INT
 124 avg_loop_niter (struct loop *loop)
 125 {
 126   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 127   if (niter == -1)
 128     {
 129       niter = likely_max_stmt_executions_int (loop);
 130
 131       if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
 132         return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   enum tree_code comp;  /* For iv elimination, the comparison.  */
 350   bitmap inv_vars;      /* The list of invariants that have to be
 351                            preserved.  */
 352   bitmap inv_exprs;     /* Loop invariant expressions.  */
 353   tree value;           /* For final value elimination, the expression for
 354                            the final value of the iv.  For iv elimination,
 355                            the new bound to compare with.  */
 356 };
 357
 358 /* Use.  */
 359 struct iv_use
 360 {
 361   unsigned id;          /* The id of the use.  */
 362   unsigned group_id;    /* The group id the use belongs to.  */
 363   enum use_type type;   /* Type of the use.  */
 364   struct iv *iv;        /* The induction variable it is based on.  */
 365   gimple *stmt;         /* Statement in that it occurs.  */
 366   tree *op_p;           /* The place where it occurs.  */
 367
 368   tree addr_base;       /* Base address with const offset stripped.  */
 369   unsigned HOST_WIDE_INT addr_offset;
 370                         /* Const offset stripped from base address.  */
 371 };
 372
 373 /* Group of uses.  */
 374 struct iv_group
 375 {
 376   /* The id of the group.  */
 377   unsigned id;
 378   /* Uses of the group are of the same type.  */
 379   enum use_type type;
 380   /* The set of "related" IV candidates, plus the important ones.  */
 381   bitmap related_cands;
 382   /* Number of IV candidates in the cost_map.  */
 383   unsigned n_map_members;
 384   /* The costs wrto the iv candidates.  */
 385   struct cost_pair *cost_map;
 386   /* The selected candidate for the group.  */
 387   struct iv_cand *selected;
 388   /* Uses in the group.  */
 389   vec<struct iv_use *> vuses;
 390 };
 391
 392 /* The position where the iv is computed.  */
 393 enum iv_position
 394 {
 395   IP_NORMAL,            /* At the end, just before the exit condition.  */
 396   IP_END,               /* At the end of the latch block.  */
 397   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 398   IP_AFTER_USE,         /* Immediately after a specific use.  */
 399   IP_ORIGINAL           /* The original biv.  */
 400 };
 401
 402 /* The induction variable candidate.  */
 403 struct iv_cand
 404 {
 405   unsigned id;          /* The number of the candidate.  */
 406   bool important;       /* Whether this is an "important" candidate, i.e. such
 407                            that it should be considered by all uses.  */
 408   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 409   gimple *incremented_at;/* For original biv, the statement where it is
 410                            incremented.  */
 411   tree var_before;      /* The variable used for it before increment.  */
 412   tree var_after;       /* The variable used for it after increment.  */
 413   struct iv *iv;        /* The value of the candidate.  NULL for
 414                            "pseudocandidate" used to indicate the possibility
 415                            to replace the final value of an iv by direct
 416                            computation of the value.  */
 417   unsigned cost;        /* Cost of the candidate.  */
 418   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 419   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 420                               where it is incremented.  */
 421   bitmap inv_vars;      /* The list of invariants that are used in step of the
 422                            biv.  */
 423   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 424                            smaller type.  */
 425 };
 426
 427 /* Hashtable entry for common candidate derived from iv uses.  */
 428 struct iv_common_cand
 429 {
 430   tree base;
 431   tree step;
 432   /* IV uses from which this common candidate is derived.  */
 433   auto_vec<struct iv_use *> uses;
 434   hashval_t hash;
 435 };
 436
 437 /* Hashtable helpers.  */
 438
 439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 440 {
 441   static inline hashval_t hash (const iv_common_cand *);
 442   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 443 };
 444
 445 /* Hash function for possible common candidates.  */
 446
 447 inline hashval_t
 448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 449 {
 450   return ccand->hash;
 451 }
 452
 453 /* Hash table equality function for common candidates.  */
 454
 455 inline bool
 456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 457                               const iv_common_cand *ccand2)
 458 {
 459   return (ccand1->hash == ccand2->hash
 460           && operand_equal_p (ccand1->base, ccand2->base, 0)
 461           && operand_equal_p (ccand1->step, ccand2->step, 0)
 462           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 463               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 464 }
 465
 466 /* Loop invariant expression hashtable entry.  */
 467
 468 struct iv_inv_expr_ent
 469 {
 470   /* Tree expression of the entry.  */
 471   tree expr;
 472   /* Unique indentifier.  */
 473   int id;
 474   /* Hash value.  */
 475   hashval_t hash;
 476 };
 477
 478 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 479
 480 static int
 481 sort_iv_inv_expr_ent (const void *a, const void *b)
 482 {
 483   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 484   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 485
 486   unsigned id1 = (*e1)->id;
 487   unsigned id2 = (*e2)->id;
 488
 489   if (id1 < id2)
 490     return -1;
 491   else if (id1 > id2)
 492     return 1;
 493   else
 494     return 0;
 495 }
 496
 497 /* Hashtable helpers.  */
 498
 499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 500 {
 501   static inline hashval_t hash (const iv_inv_expr_ent *);
 502   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 503 };
 504
 505 /* Hash function for loop invariant expressions.  */
 506
 507 inline hashval_t
 508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 509 {
 510   return expr->hash;
 511 }
 512
 513 /* Hash table equality function for expressions.  */
 514
 515 inline bool
 516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 517                            const iv_inv_expr_ent *expr2)
 518 {
 519   return expr1->hash == expr2->hash
 520          && operand_equal_p (expr1->expr, expr2->expr, 0);
 521 }
 522
 523 struct ivopts_data
 524 {
 525   /* The currently optimized loop.  */
 526   struct loop *current_loop;
 527   source_location loop_loc;
 528
 529   /* Numbers of iterations for all exits of the current loop.  */
 530   hash_map<edge, tree_niter_desc *> *niters;
 531
 532   /* Number of registers used in it.  */
 533   unsigned regs_used;
 534
 535   /* The size of version_info array allocated.  */
 536   unsigned version_info_size;
 537
 538   /* The array of information for the ssa names.  */
 539   struct version_info *version_info;
 540
 541   /* The hashtable of loop invariant expressions created
 542      by ivopt.  */
 543   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 544
 545   /* The bitmap of indices in version_info whose value was changed.  */
 546   bitmap relevant;
 547
 548   /* The uses of induction variables.  */
 549   vec<iv_group *> vgroups;
 550
 551   /* The candidates.  */
 552   vec<iv_cand *> vcands;
 553
 554   /* A bitmap of important candidates.  */
 555   bitmap important_candidates;
 556
 557   /* Cache used by tree_to_aff_combination_expand.  */
 558   hash_map<tree, name_expansion *> *name_expansion_cache;
 559
 560   /* The hashtable of common candidates derived from iv uses.  */
 561   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 562
 563   /* The common candidates.  */
 564   vec<iv_common_cand *> iv_common_cands;
 565
 566   /* The maximum invariant variable id.  */
 567   unsigned max_inv_var_id;
 568
 569   /* The maximum invariant expression id.  */
 570   unsigned max_inv_expr_id;
 571
 572   /* Number of no_overflow BIVs which are not used in memory address.  */
 573   unsigned bivs_not_used_in_addr;
 574
 575   /* Obstack for iv structure.  */
 576   struct obstack iv_obstack;
 577
 578   /* Whether to consider just related and important candidates when replacing a
 579      use.  */
 580   bool consider_all_candidates;
 581
 582   /* Are we optimizing for speed?  */
 583   bool speed;
 584
 585   /* Whether the loop body includes any function calls.  */
 586   bool body_includes_call;
 587
 588   /* Whether the loop body can only be exited via single exit.  */
 589   bool loop_single_exit_p;
 590 };
 591
 592 /* An assignment of iv candidates to uses.  */
 593
 594 struct iv_ca
 595 {
 596   /* The number of uses covered by the assignment.  */
 597   unsigned upto;
 598
 599   /* Number of uses that cannot be expressed by the candidates in the set.  */
 600   unsigned bad_groups;
 601
 602   /* Candidate assigned to a use, together with the related costs.  */
 603   struct cost_pair **cand_for_group;
 604
 605   /* Number of times each candidate is used.  */
 606   unsigned *n_cand_uses;
 607
 608   /* The candidates used.  */
 609   bitmap cands;
 610
 611   /* The number of candidates in the set.  */
 612   unsigned n_cands;
 613
 614   /* The number of invariants needed, including both invariant variants and
 615      invariant expressions.  */
 616   unsigned n_invs;
 617
 618   /* Total cost of expressing uses.  */
 619   comp_cost cand_use_cost;
 620
 621   /* Total cost of candidates.  */
 622   unsigned cand_cost;
 623
 624   /* Number of times each invariant variable is used.  */
 625   unsigned *n_inv_var_uses;
 626
 627   /* Number of times each invariant expression is used.  */
 628   unsigned *n_inv_expr_uses;
 629
 630   /* Total cost of the assignment.  */
 631   comp_cost cost;
 632 };
 633
 634 /* Difference of two iv candidate assignments.  */
 635
 636 struct iv_ca_delta
 637 {
 638   /* Changed group.  */
 639   struct iv_group *group;
 640
 641   /* An old assignment (for rollback purposes).  */
 642   struct cost_pair *old_cp;
 643
 644   /* A new assignment.  */
 645   struct cost_pair *new_cp;
 646
 647   /* Next change in the list.  */
 648   struct iv_ca_delta *next;
 649 };
 650
 651 /* Bound on number of candidates below that all candidates are considered.  */
 652
 653 #define CONSIDER_ALL_CANDIDATES_BOUND \
 654   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 655
 656 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 657    optimizing such a loop would help, and it would take ages).  */
 658
 659 #define MAX_CONSIDERED_GROUPS \
 660   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 661
 662 /* If there are at most this number of ivs in the set, try removing unnecessary
 663    ivs from the set always.  */
 664
 665 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 666   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 667
 668 /* The list of trees for that the decl_rtl field must be reset is stored
 669    here.  */
 670
 671 static vec<tree> decl_rtl_to_reset;
 672
 673 static comp_cost force_expr_to_var_cost (tree, bool);
 674
 675 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 676
 677 edge
 678 single_dom_exit (struct loop *loop)
 679 {
 680   edge exit = single_exit (loop);
 681
 682   if (!exit)
 683     return NULL;
 684
 685   if (!just_once_each_iteration_p (loop, exit->src))
 686     return NULL;
 687
 688   return exit;
 689 }
 690
 691 /* Dumps information about the induction variable IV to FILE.  Don't dump
 692    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 693    preceding spaces indicated by INDENT_LEVEL.  */
 694
 695 void
 696 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 697 {
 698   const char *p;
 699   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 700
 701   if (indent_level > 4)
 702     indent_level = 4;
 703   p = spaces + 8 - (indent_level << 1);
 704
 705   fprintf (file, "%sIV struct:\n", p);
 706   if (iv->ssa_name && dump_name)
 707     {
 708       fprintf (file, "%s  SSA_NAME:\t", p);
 709       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 710       fprintf (file, "\n");
 711     }
 712
 713   fprintf (file, "%s  Type:\t", p);
 714   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 715   fprintf (file, "\n");
 716
 717   fprintf (file, "%s  Base:\t", p);
 718   print_generic_expr (file, iv->base, TDF_SLIM);
 719   fprintf (file, "\n");
 720
 721   fprintf (file, "%s  Step:\t", p);
 722   print_generic_expr (file, iv->step, TDF_SLIM);
 723   fprintf (file, "\n");
 724
 725   if (iv->base_object)
 726     {
 727       fprintf (file, "%s  Object:\t", p);
 728       print_generic_expr (file, iv->base_object, TDF_SLIM);
 729       fprintf (file, "\n");
 730     }
 731
 732   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 733
 734   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 735            p, iv->no_overflow ? "No-overflow" : "Overflow");
 736 }
 737
 738 /* Dumps information about the USE to FILE.  */
 739
 740 void
 741 dump_use (FILE *file, struct iv_use *use)
 742 {
 743   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 744   fprintf (file, "    At stmt:\t");
 745   print_gimple_stmt (file, use->stmt, 0, 0);
 746   fprintf (file, "    At pos:\t");
 747   if (use->op_p)
 748     print_generic_expr (file, *use->op_p, TDF_SLIM);
 749   fprintf (file, "\n");
 750   dump_iv (file, use->iv, false, 2);
 751 }
 752
 753 /* Dumps information about the uses to FILE.  */
 754
 755 void
 756 dump_groups (FILE *file, struct ivopts_data *data)
 757 {
 758   unsigned i, j;
 759   struct iv_group *group;
 760
 761   for (i = 0; i < data->vgroups.length (); i++)
 762     {
 763       group = data->vgroups[i];
 764       fprintf (file, "Group %d:\n", group->id);
 765       if (group->type == USE_NONLINEAR_EXPR)
 766         fprintf (file, "  Type:\tGENERIC\n");
 767       else if (group->type == USE_ADDRESS)
 768         fprintf (file, "  Type:\tADDRESS\n");
 769       else
 770         {
 771           gcc_assert (group->type == USE_COMPARE);
 772           fprintf (file, "  Type:\tCOMPARE\n");
 773         }
 774       for (j = 0; j < group->vuses.length (); j++)
 775         dump_use (file, group->vuses[j]);
 776     }
 777 }
 778
 779 /* Dumps information about induction variable candidate CAND to FILE.  */
 780
 781 void
 782 dump_cand (FILE *file, struct iv_cand *cand)
 783 {
 784   struct iv *iv = cand->iv;
 785
 786   fprintf (file, "Candidate %d:\n", cand->id);
 787   if (cand->inv_vars)
 788     {
 789       fprintf (file, "  Depend on inv.vars: ");
 790       dump_bitmap (file, cand->inv_vars);
 791     }
 792
 793   if (cand->var_before)
 794     {
 795       fprintf (file, "  Var befor: ");
 796       print_generic_expr (file, cand->var_before, TDF_SLIM);
 797       fprintf (file, "\n");
 798     }
 799   if (cand->var_after)
 800     {
 801       fprintf (file, "  Var after: ");
 802       print_generic_expr (file, cand->var_after, TDF_SLIM);
 803       fprintf (file, "\n");
 804     }
 805
 806   switch (cand->pos)
 807     {
 808     case IP_NORMAL:
 809       fprintf (file, "  Incr POS: before exit test\n");
 810       break;
 811
 812     case IP_BEFORE_USE:
 813       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 814       break;
 815
 816     case IP_AFTER_USE:
 817       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 818       break;
 819
 820     case IP_END:
 821       fprintf (file, "  Incr POS: at end\n");
 822       break;
 823
 824     case IP_ORIGINAL:
 825       fprintf (file, "  Incr POS: orig biv\n");
 826       break;
 827     }
 828
 829   dump_iv (file, iv, false, 1);
 830 }
 831
 832 /* Returns the info for ssa version VER.  */
 833
 834 static inline struct version_info *
 835 ver_info (struct ivopts_data *data, unsigned ver)
 836 {
 837   return data->version_info + ver;
 838 }
 839
 840 /* Returns the info for ssa name NAME.  */
 841
 842 static inline struct version_info *
 843 name_info (struct ivopts_data *data, tree name)
 844 {
 845   return ver_info (data, SSA_NAME_VERSION (name));
 846 }
 847
 848 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 849    emitted in LOOP.  */
 850
 851 static bool
 852 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 853 {
 854   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 855
 856   gcc_assert (bb);
 857
 858   if (sbb == loop->latch)
 859     return true;
 860
 861   if (sbb != bb)
 862     return false;
 863
 864   return stmt == last_stmt (bb);
 865 }
 866
 867 /* Returns true if STMT if after the place where the original induction
 868    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 869    if the positions are identical.  */
 870
 871 static bool
 872 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 873 {
 874   basic_block cand_bb = gimple_bb (cand->incremented_at);
 875   basic_block stmt_bb = gimple_bb (stmt);
 876
 877   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 878     return false;
 879
 880   if (stmt_bb != cand_bb)
 881     return true;
 882
 883   if (true_if_equal
 884       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 885     return true;
 886   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 887 }
 888
 889 /* Returns true if STMT if after the place where the induction variable
 890    CAND is incremented in LOOP.  */
 891
 892 static bool
 893 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 894 {
 895   switch (cand->pos)
 896     {
 897     case IP_END:
 898       return false;
 899
 900     case IP_NORMAL:
 901       return stmt_after_ip_normal_pos (loop, stmt);
 902
 903     case IP_ORIGINAL:
 904     case IP_AFTER_USE:
 905       return stmt_after_inc_pos (cand, stmt, false);
 906
 907     case IP_BEFORE_USE:
 908       return stmt_after_inc_pos (cand, stmt, true);
 909
 910     default:
 911       gcc_unreachable ();
 912     }
 913 }
 914
 915 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 916
 917 static bool
 918 abnormal_ssa_name_p (tree exp)
 919 {
 920   if (!exp)
 921     return false;
 922
 923   if (TREE_CODE (exp) != SSA_NAME)
 924     return false;
 925
 926   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 927 }
 928
 929 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 930    abnormal phi node.  Callback for for_each_index.  */
 931
 932 static bool
 933 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 934                                   void *data ATTRIBUTE_UNUSED)
 935 {
 936   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 937     {
 938       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 939         return false;
 940       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 941         return false;
 942     }
 943
 944   return !abnormal_ssa_name_p (*index);
 945 }
 946
 947 /* Returns true if EXPR contains a ssa name that occurs in an
 948    abnormal phi node.  */
 949
 950 bool
 951 contains_abnormal_ssa_name_p (tree expr)
 952 {
 953   enum tree_code code;
 954   enum tree_code_class codeclass;
 955
 956   if (!expr)
 957     return false;
 958
 959   code = TREE_CODE (expr);
 960   codeclass = TREE_CODE_CLASS (code);
 961
 962   if (code == SSA_NAME)
 963     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 964
 965   if (code == INTEGER_CST
 966       || is_gimple_min_invariant (expr))
 967     return false;
 968
 969   if (code == ADDR_EXPR)
 970     return !for_each_index (&TREE_OPERAND (expr, 0),
 971                             idx_contains_abnormal_ssa_name_p,
 972                             NULL);
 973
 974   if (code == COND_EXPR)
 975     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 976       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 977       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 978
 979   switch (codeclass)
 980     {
 981     case tcc_binary:
 982     case tcc_comparison:
 983       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 984         return true;
 985
 986       /* Fallthru.  */
 987     case tcc_unary:
 988       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 989         return true;
 990
 991       break;
 992
 993     default:
 994       gcc_unreachable ();
 995     }
 996
 997   return false;
 998 }
 999
1000 /*  Returns the structure describing number of iterations determined from
1001     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1002
1003 static struct tree_niter_desc *
1004 niter_for_exit (struct ivopts_data *data, edge exit)
1005 {
1006   struct tree_niter_desc *desc;
1007   tree_niter_desc **slot;
1008
1009   if (!data->niters)
1010     {
1011       data->niters = new hash_map<edge, tree_niter_desc *>;
1012       slot = NULL;
1013     }
1014   else
1015     slot = data->niters->get (exit);
1016
1017   if (!slot)
1018     {
1019       /* Try to determine number of iterations.  We cannot safely work with ssa
1020          names that appear in phi nodes on abnormal edges, so that we do not
1021          create overlapping life ranges for them (PR 27283).  */
1022       desc = XNEW (struct tree_niter_desc);
1023       if (!number_of_iterations_exit (data->current_loop,
1024                                       exit, desc, true)
1025           || contains_abnormal_ssa_name_p (desc->niter))
1026         {
1027           XDELETE (desc);
1028           desc = NULL;
1029         }
1030       data->niters->put (exit, desc);
1031     }
1032   else
1033     desc = *slot;
1034
1035   return desc;
1036 }
1037
1038 /* Returns the structure describing number of iterations determined from
1039    single dominating exit of DATA->current_loop, or NULL if something
1040    goes wrong.  */
1041
1042 static struct tree_niter_desc *
1043 niter_for_single_dom_exit (struct ivopts_data *data)
1044 {
1045   edge exit = single_dom_exit (data->current_loop);
1046
1047   if (!exit)
1048     return NULL;
1049
1050   return niter_for_exit (data, exit);
1051 }
1052
1053 /* Initializes data structures used by the iv optimization pass, stored
1054    in DATA.  */
1055
1056 static void
1057 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1058 {
1059   data->version_info_size = 2 * num_ssa_names;
1060   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1061   data->relevant = BITMAP_ALLOC (NULL);
1062   data->important_candidates = BITMAP_ALLOC (NULL);
1063   data->max_inv_var_id = 0;
1064   data->max_inv_expr_id = 0;
1065   data->niters = NULL;
1066   data->vgroups.create (20);
1067   data->vcands.create (20);
1068   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1069   data->name_expansion_cache = NULL;
1070   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1071   data->iv_common_cands.create (20);
1072   decl_rtl_to_reset.create (20);
1073   gcc_obstack_init (&data->iv_obstack);
1074 }
1075
1076 /* Returns a memory object to that EXPR points.  In case we are able to
1077    determine that it does not point to any such object, NULL is returned.  */
1078
1079 static tree
1080 determine_base_object (tree expr)
1081 {
1082   enum tree_code code = TREE_CODE (expr);
1083   tree base, obj;
1084
1085   /* If this is a pointer casted to any type, we need to determine
1086      the base object for the pointer; so handle conversions before
1087      throwing away non-pointer expressions.  */
1088   if (CONVERT_EXPR_P (expr))
1089     return determine_base_object (TREE_OPERAND (expr, 0));
1090
1091   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1092     return NULL_TREE;
1093
1094   switch (code)
1095     {
1096     case INTEGER_CST:
1097       return NULL_TREE;
1098
1099     case ADDR_EXPR:
1100       obj = TREE_OPERAND (expr, 0);
1101       base = get_base_address (obj);
1102
1103       if (!base)
1104         return expr;
1105
1106       if (TREE_CODE (base) == MEM_REF)
1107         return determine_base_object (TREE_OPERAND (base, 0));
1108
1109       return fold_convert (ptr_type_node,
1110                            build_fold_addr_expr (base));
1111
1112     case POINTER_PLUS_EXPR:
1113       return determine_base_object (TREE_OPERAND (expr, 0));
1114
1115     case PLUS_EXPR:
1116     case MINUS_EXPR:
1117       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1118       gcc_unreachable ();
1119
1120     default:
1121       return fold_convert (ptr_type_node, expr);
1122     }
1123 }
1124
1125 /* Return true if address expression with non-DECL_P operand appears
1126    in EXPR.  */
1127
1128 static bool
1129 contain_complex_addr_expr (tree expr)
1130 {
1131   bool res = false;
1132
1133   STRIP_NOPS (expr);
1134   switch (TREE_CODE (expr))
1135     {
1136     case POINTER_PLUS_EXPR:
1137     case PLUS_EXPR:
1138     case MINUS_EXPR:
1139       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1140       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1141       break;
1142
1143     case ADDR_EXPR:
1144       return (!DECL_P (TREE_OPERAND (expr, 0)));
1145
1146     default:
1147       return false;
1148     }
1149
1150   return res;
1151 }
1152
1153 /* Allocates an induction variable with given initial value BASE and step STEP
1154    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1155
1156 static struct iv *
1157 alloc_iv (struct ivopts_data *data, tree base, tree step,
1158           bool no_overflow = false)
1159 {
1160   tree expr = base;
1161   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1162                                               sizeof (struct iv));
1163   gcc_assert (step != NULL_TREE);
1164
1165   /* Lower address expression in base except ones with DECL_P as operand.
1166      By doing this:
1167        1) More accurate cost can be computed for address expressions;
1168        2) Duplicate candidates won't be created for bases in different
1169           forms, like &a[0] and &a.  */
1170   STRIP_NOPS (expr);
1171   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1172       || contain_complex_addr_expr (expr))
1173     {
1174       aff_tree comb;
1175       tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1176       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1177     }
1178
1179   iv->base = base;
1180   iv->base_object = determine_base_object (base);
1181   iv->step = step;
1182   iv->biv_p = false;
1183   iv->nonlin_use = NULL;
1184   iv->ssa_name = NULL_TREE;
1185   if (!no_overflow
1186        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1187                               base, step))
1188     no_overflow = true;
1189   iv->no_overflow = no_overflow;
1190   iv->have_address_use = false;
1191
1192   return iv;
1193 }
1194
1195 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1196    doesn't overflow.  */
1197
1198 static void
1199 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1200         bool no_overflow)
1201 {
1202   struct version_info *info = name_info (data, iv);
1203
1204   gcc_assert (!info->iv);
1205
1206   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1207   info->iv = alloc_iv (data, base, step, no_overflow);
1208   info->iv->ssa_name = iv;
1209 }
1210
1211 /* Finds induction variable declaration for VAR.  */
1212
1213 static struct iv *
1214 get_iv (struct ivopts_data *data, tree var)
1215 {
1216   basic_block bb;
1217   tree type = TREE_TYPE (var);
1218
1219   if (!POINTER_TYPE_P (type)
1220       && !INTEGRAL_TYPE_P (type))
1221     return NULL;
1222
1223   if (!name_info (data, var)->iv)
1224     {
1225       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1226
1227       if (!bb
1228           || !flow_bb_inside_loop_p (data->current_loop, bb))
1229         set_iv (data, var, var, build_int_cst (type, 0), true);
1230     }
1231
1232   return name_info (data, var)->iv;
1233 }
1234
1235 /* Return the first non-invariant ssa var found in EXPR.  */
1236
1237 static tree
1238 extract_single_var_from_expr (tree expr)
1239 {
1240   int i, n;
1241   tree tmp;
1242   enum tree_code code;
1243
1244   if (!expr || is_gimple_min_invariant (expr))
1245     return NULL;
1246
1247   code = TREE_CODE (expr);
1248   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1249     {
1250       n = TREE_OPERAND_LENGTH (expr);
1251       for (i = 0; i < n; i++)
1252         {
1253           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1254
1255           if (tmp)
1256             return tmp;
1257         }
1258     }
1259   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1260 }
1261
1262 /* Finds basic ivs.  */
1263
1264 static bool
1265 find_bivs (struct ivopts_data *data)
1266 {
1267   gphi *phi;
1268   affine_iv iv;
1269   tree step, type, base, stop;
1270   bool found = false;
1271   struct loop *loop = data->current_loop;
1272   gphi_iterator psi;
1273
1274   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1275     {
1276       phi = psi.phi ();
1277
1278       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1279         continue;
1280
1281       if (virtual_operand_p (PHI_RESULT (phi)))
1282         continue;
1283
1284       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1285         continue;
1286
1287       if (integer_zerop (iv.step))
1288         continue;
1289
1290       step = iv.step;
1291       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1292       /* Stop expanding iv base at the first ssa var referred by iv step.
1293          Ideally we should stop at any ssa var, because that's expensive
1294          and unusual to happen, we just do it on the first one.
1295
1296          See PR64705 for the rationale.  */
1297       stop = extract_single_var_from_expr (step);
1298       base = expand_simple_operations (base, stop);
1299       if (contains_abnormal_ssa_name_p (base)
1300           || contains_abnormal_ssa_name_p (step))
1301         continue;
1302
1303       type = TREE_TYPE (PHI_RESULT (phi));
1304       base = fold_convert (type, base);
1305       if (step)
1306         {
1307           if (POINTER_TYPE_P (type))
1308             step = convert_to_ptrofftype (step);
1309           else
1310             step = fold_convert (type, step);
1311         }
1312
1313       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1314       found = true;
1315     }
1316
1317   return found;
1318 }
1319
1320 /* Marks basic ivs.  */
1321
1322 static void
1323 mark_bivs (struct ivopts_data *data)
1324 {
1325   gphi *phi;
1326   gimple *def;
1327   tree var;
1328   struct iv *iv, *incr_iv;
1329   struct loop *loop = data->current_loop;
1330   basic_block incr_bb;
1331   gphi_iterator psi;
1332
1333   data->bivs_not_used_in_addr = 0;
1334   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1335     {
1336       phi = psi.phi ();
1337
1338       iv = get_iv (data, PHI_RESULT (phi));
1339       if (!iv)
1340         continue;
1341
1342       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1343       def = SSA_NAME_DEF_STMT (var);
1344       /* Don't mark iv peeled from other one as biv.  */
1345       if (def
1346           && gimple_code (def) == GIMPLE_PHI
1347           && gimple_bb (def) == loop->header)
1348         continue;
1349
1350       incr_iv = get_iv (data, var);
1351       if (!incr_iv)
1352         continue;
1353
1354       /* If the increment is in the subloop, ignore it.  */
1355       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1356       if (incr_bb->loop_father != data->current_loop
1357           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1358         continue;
1359
1360       iv->biv_p = true;
1361       incr_iv->biv_p = true;
1362       if (iv->no_overflow)
1363         data->bivs_not_used_in_addr++;
1364       if (incr_iv->no_overflow)
1365         data->bivs_not_used_in_addr++;
1366     }
1367 }
1368
1369 /* Checks whether STMT defines a linear induction variable and stores its
1370    parameters to IV.  */
1371
1372 static bool
1373 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1374 {
1375   tree lhs, stop;
1376   struct loop *loop = data->current_loop;
1377
1378   iv->base = NULL_TREE;
1379   iv->step = NULL_TREE;
1380
1381   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1382     return false;
1383
1384   lhs = gimple_assign_lhs (stmt);
1385   if (TREE_CODE (lhs) != SSA_NAME)
1386     return false;
1387
1388   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1389     return false;
1390
1391   /* Stop expanding iv base at the first ssa var referred by iv step.
1392      Ideally we should stop at any ssa var, because that's expensive
1393      and unusual to happen, we just do it on the first one.
1394
1395      See PR64705 for the rationale.  */
1396   stop = extract_single_var_from_expr (iv->step);
1397   iv->base = expand_simple_operations (iv->base, stop);
1398   if (contains_abnormal_ssa_name_p (iv->base)
1399       || contains_abnormal_ssa_name_p (iv->step))
1400     return false;
1401
1402   /* If STMT could throw, then do not consider STMT as defining a GIV.
1403      While this will suppress optimizations, we can not safely delete this
1404      GIV and associated statements, even if it appears it is not used.  */
1405   if (stmt_could_throw_p (stmt))
1406     return false;
1407
1408   return true;
1409 }
1410
1411 /* Finds general ivs in statement STMT.  */
1412
1413 static void
1414 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1415 {
1416   affine_iv iv;
1417
1418   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1419     return;
1420
1421   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1422 }
1423
1424 /* Finds general ivs in basic block BB.  */
1425
1426 static void
1427 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1428 {
1429   gimple_stmt_iterator bsi;
1430
1431   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1432     find_givs_in_stmt (data, gsi_stmt (bsi));
1433 }
1434
1435 /* Finds general ivs.  */
1436
1437 static void
1438 find_givs (struct ivopts_data *data)
1439 {
1440   struct loop *loop = data->current_loop;
1441   basic_block *body = get_loop_body_in_dom_order (loop);
1442   unsigned i;
1443
1444   for (i = 0; i < loop->num_nodes; i++)
1445     find_givs_in_bb (data, body[i]);
1446   free (body);
1447 }
1448
1449 /* For each ssa name defined in LOOP determines whether it is an induction
1450    variable and if so, its initial value and step.  */
1451
1452 static bool
1453 find_induction_variables (struct ivopts_data *data)
1454 {
1455   unsigned i;
1456   bitmap_iterator bi;
1457
1458   if (!find_bivs (data))
1459     return false;
1460
1461   find_givs (data);
1462   mark_bivs (data);
1463
1464   if (dump_file && (dump_flags & TDF_DETAILS))
1465     {
1466       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1467
1468       if (niter)
1469         {
1470           fprintf (dump_file, "  number of iterations ");
1471           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1472           if (!integer_zerop (niter->may_be_zero))
1473             {
1474               fprintf (dump_file, "; zero if ");
1475               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1476             }
1477           fprintf (dump_file, "\n");
1478         };
1479
1480       fprintf (dump_file, "\n<Induction Vars>:\n");
1481       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1482         {
1483           struct version_info *info = ver_info (data, i);
1484           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1485             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1486         }
1487     }
1488
1489   return true;
1490 }
1491
1492 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1493    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1494    is the const offset stripped from IV base; for other types use, both
1495    are zero by default.  */
1496
1497 static struct iv_use *
1498 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1499             gimple *stmt, enum use_type type, tree addr_base,
1500             unsigned HOST_WIDE_INT addr_offset)
1501 {
1502   struct iv_use *use = XCNEW (struct iv_use);
1503
1504   use->id = group->vuses.length ();
1505   use->group_id = group->id;
1506   use->type = type;
1507   use->iv = iv;
1508   use->stmt = stmt;
1509   use->op_p = use_p;
1510   use->addr_base = addr_base;
1511   use->addr_offset = addr_offset;
1512
1513   group->vuses.safe_push (use);
1514   return use;
1515 }
1516
1517 /* Checks whether OP is a loop-level invariant and if so, records it.
1518    NONLINEAR_USE is true if the invariant is used in a way we do not
1519    handle specially.  */
1520
1521 static void
1522 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1523 {
1524   basic_block bb;
1525   struct version_info *info;
1526
1527   if (TREE_CODE (op) != SSA_NAME
1528       || virtual_operand_p (op))
1529     return;
1530
1531   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1532   if (bb
1533       && flow_bb_inside_loop_p (data->current_loop, bb))
1534     return;
1535
1536   info = name_info (data, op);
1537   info->name = op;
1538   info->has_nonlin_use |= nonlinear_use;
1539   if (!info->inv_id)
1540     info->inv_id = ++data->max_inv_var_id;
1541   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1542 }
1543
1544 static tree
1545 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1546
1547 /* Record a group of TYPE.  */
1548
1549 static struct iv_group *
1550 record_group (struct ivopts_data *data, enum use_type type)
1551 {
1552   struct iv_group *group = XCNEW (struct iv_group);
1553
1554   group->id = data->vgroups.length ();
1555   group->type = type;
1556   group->related_cands = BITMAP_ALLOC (NULL);
1557   group->vuses.create (1);
1558
1559   data->vgroups.safe_push (group);
1560   return group;
1561 }
1562
1563 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1564    New group will be created if there is no existing group for the use.  */
1565
1566 static struct iv_use *
1567 record_group_use (struct ivopts_data *data, tree *use_p,
1568                   struct iv *iv, gimple *stmt, enum use_type type)
1569 {
1570   tree addr_base = NULL;
1571   struct iv_group *group = NULL;
1572   unsigned HOST_WIDE_INT addr_offset = 0;
1573
1574   /* Record non address type use in a new group.  */
1575   if (type == USE_ADDRESS && iv->base_object)
1576     {
1577       unsigned int i;
1578
1579       addr_base = strip_offset (iv->base, &addr_offset);
1580       for (i = 0; i < data->vgroups.length (); i++)
1581         {
1582           struct iv_use *use;
1583
1584           group = data->vgroups[i];
1585           use = group->vuses[0];
1586           if (use->type != USE_ADDRESS || !use->iv->base_object)
1587             continue;
1588
1589           /* Check if it has the same stripped base and step.  */
1590           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1591               && operand_equal_p (iv->step, use->iv->step, 0)
1592               && operand_equal_p (addr_base, use->addr_base, 0))
1593             break;
1594         }
1595       if (i == data->vgroups.length ())
1596         group = NULL;
1597     }
1598
1599   if (!group)
1600     group = record_group (data, type);
1601
1602   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1603 }
1604
1605 /* Checks whether the use OP is interesting and if so, records it.  */
1606
1607 static struct iv_use *
1608 find_interesting_uses_op (struct ivopts_data *data, tree op)
1609 {
1610   struct iv *iv;
1611   gimple *stmt;
1612   struct iv_use *use;
1613
1614   if (TREE_CODE (op) != SSA_NAME)
1615     return NULL;
1616
1617   iv = get_iv (data, op);
1618   if (!iv)
1619     return NULL;
1620
1621   if (iv->nonlin_use)
1622     {
1623       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1624       return iv->nonlin_use;
1625     }
1626
1627   if (integer_zerop (iv->step))
1628     {
1629       record_invariant (data, op, true);
1630       return NULL;
1631     }
1632
1633   stmt = SSA_NAME_DEF_STMT (op);
1634   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1635
1636   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1637   iv->nonlin_use = use;
1638   return use;
1639 }
1640
1641 /* Given a condition in statement STMT, checks whether it is a compare
1642    of an induction variable and an invariant.  If this is the case,
1643    CONTROL_VAR is set to location of the iv, BOUND to the location of
1644    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1645    induction variable descriptions, and true is returned.  If this is not
1646    the case, CONTROL_VAR and BOUND are set to the arguments of the
1647    condition and false is returned.  */
1648
1649 static bool
1650 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1651                        tree **control_var, tree **bound,
1652                        struct iv **iv_var, struct iv **iv_bound)
1653 {
1654   /* The objects returned when COND has constant operands.  */
1655   static struct iv const_iv;
1656   static tree zero;
1657   tree *op0 = &zero, *op1 = &zero;
1658   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1659   bool ret = false;
1660
1661   if (gimple_code (stmt) == GIMPLE_COND)
1662     {
1663       gcond *cond_stmt = as_a <gcond *> (stmt);
1664       op0 = gimple_cond_lhs_ptr (cond_stmt);
1665       op1 = gimple_cond_rhs_ptr (cond_stmt);
1666     }
1667   else
1668     {
1669       op0 = gimple_assign_rhs1_ptr (stmt);
1670       op1 = gimple_assign_rhs2_ptr (stmt);
1671     }
1672
1673   zero = integer_zero_node;
1674   const_iv.step = integer_zero_node;
1675
1676   if (TREE_CODE (*op0) == SSA_NAME)
1677     iv0 = get_iv (data, *op0);
1678   if (TREE_CODE (*op1) == SSA_NAME)
1679     iv1 = get_iv (data, *op1);
1680
1681   /* Exactly one of the compared values must be an iv, and the other one must
1682      be an invariant.  */
1683   if (!iv0 || !iv1)
1684     goto end;
1685
1686   if (integer_zerop (iv0->step))
1687     {
1688       /* Control variable may be on the other side.  */
1689       std::swap (op0, op1);
1690       std::swap (iv0, iv1);
1691     }
1692   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1693
1694 end:
1695   if (control_var)
1696     *control_var = op0;
1697   if (iv_var)
1698     *iv_var = iv0;
1699   if (bound)
1700     *bound = op1;
1701   if (iv_bound)
1702     *iv_bound = iv1;
1703
1704   return ret;
1705 }
1706
1707 /* Checks whether the condition in STMT is interesting and if so,
1708    records it.  */
1709
1710 static void
1711 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1712 {
1713   tree *var_p, *bound_p;
1714   struct iv *var_iv;
1715
1716   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1717     {
1718       find_interesting_uses_op (data, *var_p);
1719       find_interesting_uses_op (data, *bound_p);
1720       return;
1721     }
1722
1723   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1724 }
1725
1726 /* Returns the outermost loop EXPR is obviously invariant in
1727    relative to the loop LOOP, i.e. if all its operands are defined
1728    outside of the returned loop.  Returns NULL if EXPR is not
1729    even obviously invariant in LOOP.  */
1730
1731 struct loop *
1732 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1733 {
1734   basic_block def_bb;
1735   unsigned i, len;
1736
1737   if (is_gimple_min_invariant (expr))
1738     return current_loops->tree_root;
1739
1740   if (TREE_CODE (expr) == SSA_NAME)
1741     {
1742       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1743       if (def_bb)
1744         {
1745           if (flow_bb_inside_loop_p (loop, def_bb))
1746             return NULL;
1747           return superloop_at_depth (loop,
1748                                      loop_depth (def_bb->loop_father) + 1);
1749         }
1750
1751       return current_loops->tree_root;
1752     }
1753
1754   if (!EXPR_P (expr))
1755     return NULL;
1756
1757   unsigned maxdepth = 0;
1758   len = TREE_OPERAND_LENGTH (expr);
1759   for (i = 0; i < len; i++)
1760     {
1761       struct loop *ivloop;
1762       if (!TREE_OPERAND (expr, i))
1763         continue;
1764
1765       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1766       if (!ivloop)
1767         return NULL;
1768       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1769     }
1770
1771   return superloop_at_depth (loop, maxdepth);
1772 }
1773
1774 /* Returns true if expression EXPR is obviously invariant in LOOP,
1775    i.e. if all its operands are defined outside of the LOOP.  LOOP
1776    should not be the function body.  */
1777
1778 bool
1779 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1780 {
1781   basic_block def_bb;
1782   unsigned i, len;
1783
1784   gcc_assert (loop_depth (loop) > 0);
1785
1786   if (is_gimple_min_invariant (expr))
1787     return true;
1788
1789   if (TREE_CODE (expr) == SSA_NAME)
1790     {
1791       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1792       if (def_bb
1793           && flow_bb_inside_loop_p (loop, def_bb))
1794         return false;
1795
1796       return true;
1797     }
1798
1799   if (!EXPR_P (expr))
1800     return false;
1801
1802   len = TREE_OPERAND_LENGTH (expr);
1803   for (i = 0; i < len; i++)
1804     if (TREE_OPERAND (expr, i)
1805         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1806       return false;
1807
1808   return true;
1809 }
1810
1811 /* Given expression EXPR which computes inductive values with respect
1812    to loop recorded in DATA, this function returns biv from which EXPR
1813    is derived by tracing definition chains of ssa variables in EXPR.  */
1814
1815 static struct iv*
1816 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1817 {
1818   struct iv *iv;
1819   unsigned i, n;
1820   tree e2, e1;
1821   enum tree_code code;
1822   gimple *stmt;
1823
1824   if (expr == NULL_TREE)
1825     return NULL;
1826
1827   if (is_gimple_min_invariant (expr))
1828     return NULL;
1829
1830   code = TREE_CODE (expr);
1831   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1832     {
1833       n = TREE_OPERAND_LENGTH (expr);
1834       for (i = 0; i < n; i++)
1835         {
1836           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1837           if (iv)
1838             return iv;
1839         }
1840     }
1841
1842   /* Stop if it's not ssa name.  */
1843   if (code != SSA_NAME)
1844     return NULL;
1845
1846   iv = get_iv (data, expr);
1847   if (!iv || integer_zerop (iv->step))
1848     return NULL;
1849   else if (iv->biv_p)
1850     return iv;
1851
1852   stmt = SSA_NAME_DEF_STMT (expr);
1853   if (gphi *phi = dyn_cast <gphi *> (stmt))
1854     {
1855       ssa_op_iter iter;
1856       use_operand_p use_p;
1857       basic_block phi_bb = gimple_bb (phi);
1858
1859       /* Skip loop header PHI that doesn't define biv.  */
1860       if (phi_bb->loop_father == data->current_loop)
1861         return NULL;
1862
1863       if (virtual_operand_p (gimple_phi_result (phi)))
1864         return NULL;
1865
1866       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1867         {
1868           tree use = USE_FROM_PTR (use_p);
1869           iv = find_deriving_biv_for_expr (data, use);
1870           if (iv)
1871             return iv;
1872         }
1873       return NULL;
1874     }
1875   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1876     return NULL;
1877
1878   e1 = gimple_assign_rhs1 (stmt);
1879   code = gimple_assign_rhs_code (stmt);
1880   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1881     return find_deriving_biv_for_expr (data, e1);
1882
1883   switch (code)
1884     {
1885     case MULT_EXPR:
1886     case PLUS_EXPR:
1887     case MINUS_EXPR:
1888     case POINTER_PLUS_EXPR:
1889       /* Increments, decrements and multiplications by a constant
1890          are simple.  */
1891       e2 = gimple_assign_rhs2 (stmt);
1892       iv = find_deriving_biv_for_expr (data, e2);
1893       if (iv)
1894         return iv;
1895       gcc_fallthrough ();
1896
1897     CASE_CONVERT:
1898       /* Casts are simple.  */
1899       return find_deriving_biv_for_expr (data, e1);
1900
1901     default:
1902       break;
1903     }
1904
1905   return NULL;
1906 }
1907
1908 /* Record BIV, its predecessor and successor that they are used in
1909    address type uses.  */
1910
1911 static void
1912 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1913 {
1914   unsigned i;
1915   tree type, base_1, base_2;
1916   bitmap_iterator bi;
1917
1918   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1919       || biv->have_address_use || !biv->no_overflow)
1920     return;
1921
1922   type = TREE_TYPE (biv->base);
1923   if (!INTEGRAL_TYPE_P (type))
1924     return;
1925
1926   biv->have_address_use = true;
1927   data->bivs_not_used_in_addr--;
1928   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1929   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1930     {
1931       struct iv *iv = ver_info (data, i)->iv;
1932
1933       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1934           || iv->have_address_use || !iv->no_overflow)
1935         continue;
1936
1937       if (type != TREE_TYPE (iv->base)
1938           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1939         continue;
1940
1941       if (!operand_equal_p (biv->step, iv->step, 0))
1942         continue;
1943
1944       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1945       if (operand_equal_p (base_1, iv->base, 0)
1946           || operand_equal_p (base_2, biv->base, 0))
1947         {
1948           iv->have_address_use = true;
1949           data->bivs_not_used_in_addr--;
1950         }
1951     }
1952 }
1953
1954 /* Cumulates the steps of indices into DATA and replaces their values with the
1955    initial ones.  Returns false when the value of the index cannot be determined.
1956    Callback for for_each_index.  */
1957
1958 struct ifs_ivopts_data
1959 {
1960   struct ivopts_data *ivopts_data;
1961   gimple *stmt;
1962   tree step;
1963 };
1964
1965 static bool
1966 idx_find_step (tree base, tree *idx, void *data)
1967 {
1968   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1969   struct iv *iv;
1970   bool use_overflow_semantics = false;
1971   tree step, iv_base, iv_step, lbound, off;
1972   struct loop *loop = dta->ivopts_data->current_loop;
1973
1974   /* If base is a component ref, require that the offset of the reference
1975      be invariant.  */
1976   if (TREE_CODE (base) == COMPONENT_REF)
1977     {
1978       off = component_ref_field_offset (base);
1979       return expr_invariant_in_loop_p (loop, off);
1980     }
1981
1982   /* If base is array, first check whether we will be able to move the
1983      reference out of the loop (in order to take its address in strength
1984      reduction).  In order for this to work we need both lower bound
1985      and step to be loop invariants.  */
1986   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1987     {
1988       /* Moreover, for a range, the size needs to be invariant as well.  */
1989       if (TREE_CODE (base) == ARRAY_RANGE_REF
1990           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1991         return false;
1992
1993       step = array_ref_element_size (base);
1994       lbound = array_ref_low_bound (base);
1995
1996       if (!expr_invariant_in_loop_p (loop, step)
1997           || !expr_invariant_in_loop_p (loop, lbound))
1998         return false;
1999     }
2000
2001   if (TREE_CODE (*idx) != SSA_NAME)
2002     return true;
2003
2004   iv = get_iv (dta->ivopts_data, *idx);
2005   if (!iv)
2006     return false;
2007
2008   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2009           *&x[0], which is not folded and does not trigger the
2010           ARRAY_REF path below.  */
2011   *idx = iv->base;
2012
2013   if (integer_zerop (iv->step))
2014     return true;
2015
2016   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2017     {
2018       step = array_ref_element_size (base);
2019
2020       /* We only handle addresses whose step is an integer constant.  */
2021       if (TREE_CODE (step) != INTEGER_CST)
2022         return false;
2023     }
2024   else
2025     /* The step for pointer arithmetics already is 1 byte.  */
2026     step = size_one_node;
2027
2028   iv_base = iv->base;
2029   iv_step = iv->step;
2030   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2031     use_overflow_semantics = true;
2032
2033   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2034                             sizetype, &iv_base, &iv_step, dta->stmt,
2035                             use_overflow_semantics))
2036     {
2037       /* The index might wrap.  */
2038       return false;
2039     }
2040
2041   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2042   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2043
2044   if (dta->ivopts_data->bivs_not_used_in_addr)
2045     {
2046       if (!iv->biv_p)
2047         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2048
2049       record_biv_for_address_use (dta->ivopts_data, iv);
2050     }
2051   return true;
2052 }
2053
2054 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2055    object is passed to it in DATA.  */
2056
2057 static bool
2058 idx_record_use (tree base, tree *idx,
2059                 void *vdata)
2060 {
2061   struct ivopts_data *data = (struct ivopts_data *) vdata;
2062   find_interesting_uses_op (data, *idx);
2063   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2064     {
2065       find_interesting_uses_op (data, array_ref_element_size (base));
2066       find_interesting_uses_op (data, array_ref_low_bound (base));
2067     }
2068   return true;
2069 }
2070
2071 /* If we can prove that TOP = cst * BOT for some constant cst,
2072    store cst to MUL and return true.  Otherwise return false.
2073    The returned value is always sign-extended, regardless of the
2074    signedness of TOP and BOT.  */
2075
2076 static bool
2077 constant_multiple_of (tree top, tree bot, widest_int *mul)
2078 {
2079   tree mby;
2080   enum tree_code code;
2081   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2082   widest_int res, p0, p1;
2083
2084   STRIP_NOPS (top);
2085   STRIP_NOPS (bot);
2086
2087   if (operand_equal_p (top, bot, 0))
2088     {
2089       *mul = 1;
2090       return true;
2091     }
2092
2093   code = TREE_CODE (top);
2094   switch (code)
2095     {
2096     case MULT_EXPR:
2097       mby = TREE_OPERAND (top, 1);
2098       if (TREE_CODE (mby) != INTEGER_CST)
2099         return false;
2100
2101       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2102         return false;
2103
2104       *mul = wi::sext (res * wi::to_widest (mby), precision);
2105       return true;
2106
2107     case PLUS_EXPR:
2108     case MINUS_EXPR:
2109       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2110           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2111         return false;
2112
2113       if (code == MINUS_EXPR)
2114         p1 = -p1;
2115       *mul = wi::sext (p0 + p1, precision);
2116       return true;
2117
2118     case INTEGER_CST:
2119       if (TREE_CODE (bot) != INTEGER_CST)
2120         return false;
2121
2122       p0 = widest_int::from (top, SIGNED);
2123       p1 = widest_int::from (bot, SIGNED);
2124       if (p1 == 0)
2125         return false;
2126       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2127       return res == 0;
2128
2129     default:
2130       return false;
2131     }
2132 }
2133
2134 /* Return true if memory reference REF with step STEP may be unaligned.  */
2135
2136 static bool
2137 may_be_unaligned_p (tree ref, tree step)
2138 {
2139   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2140      thus they are not misaligned.  */
2141   if (TREE_CODE (ref) == TARGET_MEM_REF)
2142     return false;
2143
2144   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2145   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2146     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2147
2148   unsigned HOST_WIDE_INT bitpos;
2149   unsigned int ref_align;
2150   get_object_alignment_1 (ref, &ref_align, &bitpos);
2151   if (ref_align < align
2152       || (bitpos % align) != 0
2153       || (bitpos % BITS_PER_UNIT) != 0)
2154     return true;
2155
2156   unsigned int trailing_zeros = tree_ctz (step);
2157   if (trailing_zeros < HOST_BITS_PER_INT
2158       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2159     return true;
2160
2161   return false;
2162 }
2163
2164 /* Return true if EXPR may be non-addressable.   */
2165
2166 bool
2167 may_be_nonaddressable_p (tree expr)
2168 {
2169   switch (TREE_CODE (expr))
2170     {
2171     case TARGET_MEM_REF:
2172       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2173          target, thus they are always addressable.  */
2174       return false;
2175
2176     case MEM_REF:
2177       /* Likewise for MEM_REFs, modulo the storage order.  */
2178       return REF_REVERSE_STORAGE_ORDER (expr);
2179
2180     case BIT_FIELD_REF:
2181       if (REF_REVERSE_STORAGE_ORDER (expr))
2182         return true;
2183       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2184
2185     case COMPONENT_REF:
2186       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2187         return true;
2188       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2189              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2190
2191     case ARRAY_REF:
2192     case ARRAY_RANGE_REF:
2193       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2194         return true;
2195       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2196
2197     case VIEW_CONVERT_EXPR:
2198       /* This kind of view-conversions may wrap non-addressable objects
2199          and make them look addressable.  After some processing the
2200          non-addressability may be uncovered again, causing ADDR_EXPRs
2201          of inappropriate objects to be built.  */
2202       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2203           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2204         return true;
2205       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2206
2207     CASE_CONVERT:
2208       return true;
2209
2210     default:
2211       break;
2212     }
2213
2214   return false;
2215 }
2216
2217 /* Finds addresses in *OP_P inside STMT.  */
2218
2219 static void
2220 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2221                                tree *op_p)
2222 {
2223   tree base = *op_p, step = size_zero_node;
2224   struct iv *civ;
2225   struct ifs_ivopts_data ifs_ivopts_data;
2226
2227   /* Do not play with volatile memory references.  A bit too conservative,
2228      perhaps, but safe.  */
2229   if (gimple_has_volatile_ops (stmt))
2230     goto fail;
2231
2232   /* Ignore bitfields for now.  Not really something terribly complicated
2233      to handle.  TODO.  */
2234   if (TREE_CODE (base) == BIT_FIELD_REF)
2235     goto fail;
2236
2237   base = unshare_expr (base);
2238
2239   if (TREE_CODE (base) == TARGET_MEM_REF)
2240     {
2241       tree type = build_pointer_type (TREE_TYPE (base));
2242       tree astep;
2243
2244       if (TMR_BASE (base)
2245           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2246         {
2247           civ = get_iv (data, TMR_BASE (base));
2248           if (!civ)
2249             goto fail;
2250
2251           TMR_BASE (base) = civ->base;
2252           step = civ->step;
2253         }
2254       if (TMR_INDEX2 (base)
2255           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2256         {
2257           civ = get_iv (data, TMR_INDEX2 (base));
2258           if (!civ)
2259             goto fail;
2260
2261           TMR_INDEX2 (base) = civ->base;
2262           step = civ->step;
2263         }
2264       if (TMR_INDEX (base)
2265           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2266         {
2267           civ = get_iv (data, TMR_INDEX (base));
2268           if (!civ)
2269             goto fail;
2270
2271           TMR_INDEX (base) = civ->base;
2272           astep = civ->step;
2273
2274           if (astep)
2275             {
2276               if (TMR_STEP (base))
2277                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2278
2279               step = fold_build2 (PLUS_EXPR, type, step, astep);
2280             }
2281         }
2282
2283       if (integer_zerop (step))
2284         goto fail;
2285       base = tree_mem_ref_addr (type, base);
2286     }
2287   else
2288     {
2289       ifs_ivopts_data.ivopts_data = data;
2290       ifs_ivopts_data.stmt = stmt;
2291       ifs_ivopts_data.step = size_zero_node;
2292       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2293           || integer_zerop (ifs_ivopts_data.step))
2294         goto fail;
2295       step = ifs_ivopts_data.step;
2296
2297       /* Check that the base expression is addressable.  This needs
2298          to be done after substituting bases of IVs into it.  */
2299       if (may_be_nonaddressable_p (base))
2300         goto fail;
2301
2302       /* Moreover, on strict alignment platforms, check that it is
2303          sufficiently aligned.  */
2304       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2305         goto fail;
2306
2307       base = build_fold_addr_expr (base);
2308
2309       /* Substituting bases of IVs into the base expression might
2310          have caused folding opportunities.  */
2311       if (TREE_CODE (base) == ADDR_EXPR)
2312         {
2313           tree *ref = &TREE_OPERAND (base, 0);
2314           while (handled_component_p (*ref))
2315             ref = &TREE_OPERAND (*ref, 0);
2316           if (TREE_CODE (*ref) == MEM_REF)
2317             {
2318               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2319                                       TREE_OPERAND (*ref, 0),
2320                                       TREE_OPERAND (*ref, 1));
2321               if (tem)
2322                 *ref = tem;
2323             }
2324         }
2325     }
2326
2327   civ = alloc_iv (data, base, step);
2328   /* Fail if base object of this memory reference is unknown.  */
2329   if (civ->base_object == NULL_TREE)
2330     goto fail;
2331
2332   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2333   return;
2334
2335 fail:
2336   for_each_index (op_p, idx_record_use, data);
2337 }
2338
2339 /* Finds and records invariants used in STMT.  */
2340
2341 static void
2342 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2343 {
2344   ssa_op_iter iter;
2345   use_operand_p use_p;
2346   tree op;
2347
2348   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2349     {
2350       op = USE_FROM_PTR (use_p);
2351       record_invariant (data, op, false);
2352     }
2353 }
2354
2355 /* Finds interesting uses of induction variables in the statement STMT.  */
2356
2357 static void
2358 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2359 {
2360   struct iv *iv;
2361   tree op, *lhs, *rhs;
2362   ssa_op_iter iter;
2363   use_operand_p use_p;
2364   enum tree_code code;
2365
2366   find_invariants_stmt (data, stmt);
2367
2368   if (gimple_code (stmt) == GIMPLE_COND)
2369     {
2370       find_interesting_uses_cond (data, stmt);
2371       return;
2372     }
2373
2374   if (is_gimple_assign (stmt))
2375     {
2376       lhs = gimple_assign_lhs_ptr (stmt);
2377       rhs = gimple_assign_rhs1_ptr (stmt);
2378
2379       if (TREE_CODE (*lhs) == SSA_NAME)
2380         {
2381           /* If the statement defines an induction variable, the uses are not
2382              interesting by themselves.  */
2383
2384           iv = get_iv (data, *lhs);
2385
2386           if (iv && !integer_zerop (iv->step))
2387             return;
2388         }
2389
2390       code = gimple_assign_rhs_code (stmt);
2391       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2392           && (REFERENCE_CLASS_P (*rhs)
2393               || is_gimple_val (*rhs)))
2394         {
2395           if (REFERENCE_CLASS_P (*rhs))
2396             find_interesting_uses_address (data, stmt, rhs);
2397           else
2398             find_interesting_uses_op (data, *rhs);
2399
2400           if (REFERENCE_CLASS_P (*lhs))
2401             find_interesting_uses_address (data, stmt, lhs);
2402           return;
2403         }
2404       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2405         {
2406           find_interesting_uses_cond (data, stmt);
2407           return;
2408         }
2409
2410       /* TODO -- we should also handle address uses of type
2411
2412          memory = call (whatever);
2413
2414          and
2415
2416          call (memory).  */
2417     }
2418
2419   if (gimple_code (stmt) == GIMPLE_PHI
2420       && gimple_bb (stmt) == data->current_loop->header)
2421     {
2422       iv = get_iv (data, PHI_RESULT (stmt));
2423
2424       if (iv && !integer_zerop (iv->step))
2425         return;
2426     }
2427
2428   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2429     {
2430       op = USE_FROM_PTR (use_p);
2431
2432       if (TREE_CODE (op) != SSA_NAME)
2433         continue;
2434
2435       iv = get_iv (data, op);
2436       if (!iv)
2437         continue;
2438
2439       find_interesting_uses_op (data, op);
2440     }
2441 }
2442
2443 /* Finds interesting uses of induction variables outside of loops
2444    on loop exit edge EXIT.  */
2445
2446 static void
2447 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2448 {
2449   gphi *phi;
2450   gphi_iterator psi;
2451   tree def;
2452
2453   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2454     {
2455       phi = psi.phi ();
2456       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2457       if (!virtual_operand_p (def))
2458         find_interesting_uses_op (data, def);
2459     }
2460 }
2461
2462 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2463    mode for memory reference represented by USE.  */
2464
2465 static GTY (()) vec<rtx, va_gc> *addr_list;
2466
2467 static bool
2468 addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
2469 {
2470   rtx reg, addr;
2471   unsigned list_index;
2472   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2473   machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2474
2475   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2476   if (list_index >= vec_safe_length (addr_list))
2477     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2478
2479   addr = (*addr_list)[list_index];
2480   if (!addr)
2481     {
2482       addr_mode = targetm.addr_space.address_mode (as);
2483       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2484       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2485       (*addr_list)[list_index] = addr;
2486     }
2487   else
2488     addr_mode = GET_MODE (addr);
2489
2490   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2491   return (memory_address_addr_space_p (mem_mode, addr, as));
2492 }
2493
2494 /* Comparison function to sort group in ascending order of addr_offset.  */
2495
2496 static int
2497 group_compare_offset (const void *a, const void *b)
2498 {
2499   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2500   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2501
2502   if ((*u1)->addr_offset != (*u2)->addr_offset)
2503     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2504   else
2505     return 0;
2506 }
2507
2508 /* Check if small groups should be split.  Return true if no group
2509    contains more than two uses with distinct addr_offsets.  Return
2510    false otherwise.  We want to split such groups because:
2511
2512      1) Small groups don't have much benefit and may interfer with
2513         general candidate selection.
2514      2) Size for problem with only small groups is usually small and
2515         general algorithm can handle it well.
2516
2517    TODO -- Above claim may not hold when we want to merge memory
2518    accesses with conseuctive addresses.  */
2519
2520 static bool
2521 split_small_address_groups_p (struct ivopts_data *data)
2522 {
2523   unsigned int i, j, distinct = 1;
2524   struct iv_use *pre;
2525   struct iv_group *group;
2526
2527   for (i = 0; i < data->vgroups.length (); i++)
2528     {
2529       group = data->vgroups[i];
2530       if (group->vuses.length () == 1)
2531         continue;
2532
2533       gcc_assert (group->type == USE_ADDRESS);
2534       if (group->vuses.length () == 2)
2535         {
2536           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2537             std::swap (group->vuses[0], group->vuses[1]);
2538         }
2539       else
2540         group->vuses.qsort (group_compare_offset);
2541
2542       if (distinct > 2)
2543         continue;
2544
2545       distinct = 1;
2546       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2547         {
2548           if (group->vuses[j]->addr_offset != pre->addr_offset)
2549             {
2550               pre = group->vuses[j];
2551               distinct++;
2552             }
2553
2554           if (distinct > 2)
2555             break;
2556         }
2557     }
2558
2559   return (distinct <= 2);
2560 }
2561
2562 /* For each group of address type uses, this function further groups
2563    these uses according to the maximum offset supported by target's
2564    [base + offset] addressing mode.  */
2565
2566 static void
2567 split_address_groups (struct ivopts_data *data)
2568 {
2569   unsigned int i, j;
2570   /* Always split group.  */
2571   bool split_p = split_small_address_groups_p (data);
2572
2573   for (i = 0; i < data->vgroups.length (); i++)
2574     {
2575       struct iv_group *new_group = NULL;
2576       struct iv_group *group = data->vgroups[i];
2577       struct iv_use *use = group->vuses[0];
2578
2579       use->id = 0;
2580       use->group_id = group->id;
2581       if (group->vuses.length () == 1)
2582         continue;
2583
2584       gcc_assert (group->type == USE_ADDRESS);
2585
2586       for (j = 1; j < group->vuses.length ();)
2587         {
2588           struct iv_use *next = group->vuses[j];
2589           HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
2590
2591           /* Split group if aksed to, or the offset against the first
2592              use can't fit in offset part of addressing mode.  IV uses
2593              having the same offset are still kept in one group.  */
2594           if (offset != 0 &&
2595               (split_p || !addr_offset_valid_p (use, offset)))
2596             {
2597               if (!new_group)
2598                 new_group = record_group (data, group->type);
2599               group->vuses.ordered_remove (j);
2600               new_group->vuses.safe_push (next);
2601               continue;
2602             }
2603
2604           next->id = j;
2605           next->group_id = group->id;
2606           j++;
2607         }
2608     }
2609 }
2610
2611 /* Finds uses of the induction variables that are interesting.  */
2612
2613 static void
2614 find_interesting_uses (struct ivopts_data *data)
2615 {
2616   basic_block bb;
2617   gimple_stmt_iterator bsi;
2618   basic_block *body = get_loop_body (data->current_loop);
2619   unsigned i;
2620   edge e;
2621
2622   for (i = 0; i < data->current_loop->num_nodes; i++)
2623     {
2624       edge_iterator ei;
2625       bb = body[i];
2626
2627       FOR_EACH_EDGE (e, ei, bb->succs)
2628         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2629             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2630           find_interesting_uses_outside (data, e);
2631
2632       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2633         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2634       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2635         if (!is_gimple_debug (gsi_stmt (bsi)))
2636           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2637     }
2638
2639   split_address_groups (data);
2640
2641   if (dump_file && (dump_flags & TDF_DETAILS))
2642     {
2643       bitmap_iterator bi;
2644
2645       fprintf (dump_file, "\n<Invariant Vars>:\n");
2646       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2647         {
2648           struct version_info *info = ver_info (data, i);
2649           if (info->inv_id)
2650             {
2651               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2652               print_generic_expr (dump_file, info->name, TDF_SLIM);
2653               fprintf (dump_file, "%s\n",
2654                        info->has_nonlin_use ? "" : "\t(eliminable)");
2655             }
2656         }
2657
2658       fprintf (dump_file, "\n<IV Groups>:\n");
2659       dump_groups (dump_file, data);
2660       fprintf (dump_file, "\n");
2661     }
2662
2663   free (body);
2664 }
2665
2666 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2667    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2668    we are at the top-level of the processed address.  */
2669
2670 static tree
2671 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2672                 HOST_WIDE_INT *offset)
2673 {
2674   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2675   enum tree_code code;
2676   tree type, orig_type = TREE_TYPE (expr);
2677   HOST_WIDE_INT off0, off1, st;
2678   tree orig_expr = expr;
2679
2680   STRIP_NOPS (expr);
2681
2682   type = TREE_TYPE (expr);
2683   code = TREE_CODE (expr);
2684   *offset = 0;
2685
2686   switch (code)
2687     {
2688     case INTEGER_CST:
2689       if (!cst_and_fits_in_hwi (expr)
2690           || integer_zerop (expr))
2691         return orig_expr;
2692
2693       *offset = int_cst_value (expr);
2694       return build_int_cst (orig_type, 0);
2695
2696     case POINTER_PLUS_EXPR:
2697     case PLUS_EXPR:
2698     case MINUS_EXPR:
2699       op0 = TREE_OPERAND (expr, 0);
2700       op1 = TREE_OPERAND (expr, 1);
2701
2702       op0 = strip_offset_1 (op0, false, false, &off0);
2703       op1 = strip_offset_1 (op1, false, false, &off1);
2704
2705       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2706       if (op0 == TREE_OPERAND (expr, 0)
2707           && op1 == TREE_OPERAND (expr, 1))
2708         return orig_expr;
2709
2710       if (integer_zerop (op1))
2711         expr = op0;
2712       else if (integer_zerop (op0))
2713         {
2714           if (code == MINUS_EXPR)
2715             expr = fold_build1 (NEGATE_EXPR, type, op1);
2716           else
2717             expr = op1;
2718         }
2719       else
2720         expr = fold_build2 (code, type, op0, op1);
2721
2722       return fold_convert (orig_type, expr);
2723
2724     case MULT_EXPR:
2725       op1 = TREE_OPERAND (expr, 1);
2726       if (!cst_and_fits_in_hwi (op1))
2727         return orig_expr;
2728
2729       op0 = TREE_OPERAND (expr, 0);
2730       op0 = strip_offset_1 (op0, false, false, &off0);
2731       if (op0 == TREE_OPERAND (expr, 0))
2732         return orig_expr;
2733
2734       *offset = off0 * int_cst_value (op1);
2735       if (integer_zerop (op0))
2736         expr = op0;
2737       else
2738         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2739
2740       return fold_convert (orig_type, expr);
2741
2742     case ARRAY_REF:
2743     case ARRAY_RANGE_REF:
2744       if (!inside_addr)
2745         return orig_expr;
2746
2747       step = array_ref_element_size (expr);
2748       if (!cst_and_fits_in_hwi (step))
2749         break;
2750
2751       st = int_cst_value (step);
2752       op1 = TREE_OPERAND (expr, 1);
2753       op1 = strip_offset_1 (op1, false, false, &off1);
2754       *offset = off1 * st;
2755
2756       if (top_compref
2757           && integer_zerop (op1))
2758         {
2759           /* Strip the component reference completely.  */
2760           op0 = TREE_OPERAND (expr, 0);
2761           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2762           *offset += off0;
2763           return op0;
2764         }
2765       break;
2766
2767     case COMPONENT_REF:
2768       {
2769         tree field;
2770
2771         if (!inside_addr)
2772           return orig_expr;
2773
2774         tmp = component_ref_field_offset (expr);
2775         field = TREE_OPERAND (expr, 1);
2776         if (top_compref
2777             && cst_and_fits_in_hwi (tmp)
2778             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2779           {
2780             HOST_WIDE_INT boffset, abs_off;
2781
2782             /* Strip the component reference completely.  */
2783             op0 = TREE_OPERAND (expr, 0);
2784             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2785             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2786             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2787             if (boffset < 0)
2788               abs_off = -abs_off;
2789
2790             *offset = off0 + int_cst_value (tmp) + abs_off;
2791             return op0;
2792           }
2793       }
2794       break;
2795
2796     case ADDR_EXPR:
2797       op0 = TREE_OPERAND (expr, 0);
2798       op0 = strip_offset_1 (op0, true, true, &off0);
2799       *offset += off0;
2800
2801       if (op0 == TREE_OPERAND (expr, 0))
2802         return orig_expr;
2803
2804       expr = build_fold_addr_expr (op0);
2805       return fold_convert (orig_type, expr);
2806
2807     case MEM_REF:
2808       /* ???  Offset operand?  */
2809       inside_addr = false;
2810       break;
2811
2812     default:
2813       return orig_expr;
2814     }
2815
2816   /* Default handling of expressions for that we want to recurse into
2817      the first operand.  */
2818   op0 = TREE_OPERAND (expr, 0);
2819   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2820   *offset += off0;
2821
2822   if (op0 == TREE_OPERAND (expr, 0)
2823       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2824     return orig_expr;
2825
2826   expr = copy_node (expr);
2827   TREE_OPERAND (expr, 0) = op0;
2828   if (op1)
2829     TREE_OPERAND (expr, 1) = op1;
2830
2831   /* Inside address, we might strip the top level component references,
2832      thus changing type of the expression.  Handling of ADDR_EXPR
2833      will fix that.  */
2834   expr = fold_convert (orig_type, expr);
2835
2836   return expr;
2837 }
2838
2839 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2840
2841 static tree
2842 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2843 {
2844   HOST_WIDE_INT off;
2845   tree core = strip_offset_1 (expr, false, false, &off);
2846   *offset = off;
2847   return core;
2848 }
2849
2850 /* Returns variant of TYPE that can be used as base for different uses.
2851    We return unsigned type with the same precision, which avoids problems
2852    with overflows.  */
2853
2854 static tree
2855 generic_type_for (tree type)
2856 {
2857   if (POINTER_TYPE_P (type))
2858     return unsigned_type_for (type);
2859
2860   if (TYPE_UNSIGNED (type))
2861     return type;
2862
2863   return unsigned_type_for (type);
2864 }
2865
2866 /* Private data for walk_tree.  */
2867
2868 struct walk_tree_data
2869 {
2870   bitmap *inv_vars;
2871   struct ivopts_data *idata;
2872 };
2873
2874 /* Callback function for walk_tree, it records invariants and symbol
2875    reference in *EXPR_P.  DATA is the structure storing result info.  */
2876
2877 static tree
2878 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2879 {
2880   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2881   struct version_info *info;
2882
2883   if (TREE_CODE (*expr_p) != SSA_NAME)
2884     return NULL_TREE;
2885
2886   info = name_info (wdata->idata, *expr_p);
2887   if (!info->inv_id || info->has_nonlin_use)
2888     return NULL_TREE;
2889
2890   if (!*wdata->inv_vars)
2891     *wdata->inv_vars = BITMAP_ALLOC (NULL);
2892   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2893
2894   return NULL_TREE;
2895 }
2896
2897 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
2898    store it.  */
2899
2900 static inline void
2901 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2902 {
2903   struct walk_tree_data wdata;
2904
2905   if (!inv_vars)
2906     return;
2907
2908   wdata.idata = data;
2909   wdata.inv_vars = inv_vars;
2910   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2911 }
2912
2913 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
2914    will be recorded if it doesn't exist yet.  Given below two exprs:
2915      inv_expr + cst1, inv_expr + cst2
2916    It's hard to make decision whether constant part should be stripped
2917    or not.  We choose to not strip based on below facts:
2918      1) We need to count ADD cost for constant part if it's stripped,
2919         which is't always trivial where this functions is called.
2920      2) Stripping constant away may be conflict with following loop
2921         invariant hoisting pass.
2922      3) Not stripping constant away results in more invariant exprs,
2923         which usually leads to decision preferring lower reg pressure.  */
2924
2925 static iv_inv_expr_ent *
2926 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2927 {
2928   STRIP_NOPS (inv_expr);
2929
2930   if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME)
2931     return NULL;
2932
2933   /* Don't strip constant part away as we used to.  */
2934
2935   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
2936   struct iv_inv_expr_ent ent;
2937   ent.expr = inv_expr;
2938   ent.hash = iterative_hash_expr (inv_expr, 0);
2939   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2940
2941   if (!*slot)
2942     {
2943       *slot = XNEW (struct iv_inv_expr_ent);
2944       (*slot)->expr = inv_expr;
2945       (*slot)->hash = ent.hash;
2946       (*slot)->id = ++data->max_inv_expr_id;
2947     }
2948
2949   return *slot;
2950 }
2951
2952 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2953    position to POS.  If USE is not NULL, the candidate is set as related to
2954    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2955    replacement of the final value of the iv by a direct computation.  */
2956
2957 static struct iv_cand *
2958 add_candidate_1 (struct ivopts_data *data,
2959                  tree base, tree step, bool important, enum iv_position pos,
2960                  struct iv_use *use, gimple *incremented_at,
2961                  struct iv *orig_iv = NULL)
2962 {
2963   unsigned i;
2964   struct iv_cand *cand = NULL;
2965   tree type, orig_type;
2966
2967   gcc_assert (base && step);
2968
2969   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2970      live, but the ivopts code may replace a real pointer with one
2971      pointing before or after the memory block that is then adjusted
2972      into the memory block during the loop.  FIXME: It would likely be
2973      better to actually force the pointer live and still use ivopts;
2974      for example, it would be enough to write the pointer into memory
2975      and keep it there until after the loop.  */
2976   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2977     return NULL;
2978
2979   /* For non-original variables, make sure their values are computed in a type
2980      that does not invoke undefined behavior on overflows (since in general,
2981      we cannot prove that these induction variables are non-wrapping).  */
2982   if (pos != IP_ORIGINAL)
2983     {
2984       orig_type = TREE_TYPE (base);
2985       type = generic_type_for (orig_type);
2986       if (type != orig_type)
2987         {
2988           base = fold_convert (type, base);
2989           step = fold_convert (type, step);
2990         }
2991     }
2992
2993   for (i = 0; i < data->vcands.length (); i++)
2994     {
2995       cand = data->vcands[i];
2996
2997       if (cand->pos != pos)
2998         continue;
2999
3000       if (cand->incremented_at != incremented_at
3001           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3002               && cand->ainc_use != use))
3003         continue;
3004
3005       if (operand_equal_p (base, cand->iv->base, 0)
3006           && operand_equal_p (step, cand->iv->step, 0)
3007           && (TYPE_PRECISION (TREE_TYPE (base))
3008               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3009         break;
3010     }
3011
3012   if (i == data->vcands.length ())
3013     {
3014       cand = XCNEW (struct iv_cand);
3015       cand->id = i;
3016       cand->iv = alloc_iv (data, base, step);
3017       cand->pos = pos;
3018       if (pos != IP_ORIGINAL)
3019         {
3020           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3021           cand->var_after = cand->var_before;
3022         }
3023       cand->important = important;
3024       cand->incremented_at = incremented_at;
3025       data->vcands.safe_push (cand);
3026
3027       if (TREE_CODE (step) != INTEGER_CST)
3028         find_inv_vars (data, &step, &cand->inv_vars);
3029
3030       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3031         cand->ainc_use = use;
3032       else
3033         cand->ainc_use = NULL;
3034
3035       cand->orig_iv = orig_iv;
3036       if (dump_file && (dump_flags & TDF_DETAILS))
3037         dump_cand (dump_file, cand);
3038     }
3039
3040   cand->important |= important;
3041
3042   /* Relate candidate to the group for which it is added.  */
3043   if (use)
3044     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3045
3046   return cand;
3047 }
3048
3049 /* Returns true if incrementing the induction variable at the end of the LOOP
3050    is allowed.
3051
3052    The purpose is to avoid splitting latch edge with a biv increment, thus
3053    creating a jump, possibly confusing other optimization passes and leaving
3054    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
3055    is not available (so we do not have a better alternative), or if the latch
3056    edge is already nonempty.  */
3057
3058 static bool
3059 allow_ip_end_pos_p (struct loop *loop)
3060 {
3061   if (!ip_normal_pos (loop))
3062     return true;
3063
3064   if (!empty_block_p (ip_end_pos (loop)))
3065     return true;
3066
3067   return false;
3068 }
3069
3070 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3071    Important field is set to IMPORTANT.  */
3072
3073 static void
3074 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3075                         bool important, struct iv_use *use)
3076 {
3077   basic_block use_bb = gimple_bb (use->stmt);
3078   machine_mode mem_mode;
3079   unsigned HOST_WIDE_INT cstepi;
3080
3081   /* If we insert the increment in any position other than the standard
3082      ones, we must ensure that it is incremented once per iteration.
3083      It must not be in an inner nested loop, or one side of an if
3084      statement.  */
3085   if (use_bb->loop_father != data->current_loop
3086       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3087       || stmt_could_throw_p (use->stmt)
3088       || !cst_and_fits_in_hwi (step))
3089     return;
3090
3091   cstepi = int_cst_value (step);
3092
3093   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3094   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3095         || USE_STORE_PRE_INCREMENT (mem_mode))
3096        && GET_MODE_SIZE (mem_mode) == cstepi)
3097       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3098            || USE_STORE_PRE_DECREMENT (mem_mode))
3099           && GET_MODE_SIZE (mem_mode) == -cstepi))
3100     {
3101       enum tree_code code = MINUS_EXPR;
3102       tree new_base;
3103       tree new_step = step;
3104
3105       if (POINTER_TYPE_P (TREE_TYPE (base)))
3106         {
3107           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3108           code = POINTER_PLUS_EXPR;
3109         }
3110       else
3111         new_step = fold_convert (TREE_TYPE (base), new_step);
3112       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3113       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3114                        use->stmt);
3115     }
3116   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3117         || USE_STORE_POST_INCREMENT (mem_mode))
3118        && GET_MODE_SIZE (mem_mode) == cstepi)
3119       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3120            || USE_STORE_POST_DECREMENT (mem_mode))
3121           && GET_MODE_SIZE (mem_mode) == -cstepi))
3122     {
3123       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3124                        use->stmt);
3125     }
3126 }
3127
3128 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3129    position to POS.  If USE is not NULL, the candidate is set as related to
3130    it.  The candidate computation is scheduled before exit condition and at
3131    the end of loop.  */
3132
3133 static void
3134 add_candidate (struct ivopts_data *data,
3135                tree base, tree step, bool important, struct iv_use *use,
3136                struct iv *orig_iv = NULL)
3137 {
3138   if (ip_normal_pos (data->current_loop))
3139     add_candidate_1 (data, base, step, important,
3140                      IP_NORMAL, use, NULL, orig_iv);
3141   if (ip_end_pos (data->current_loop)
3142       && allow_ip_end_pos_p (data->current_loop))
3143     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3144 }
3145
3146 /* Adds standard iv candidates.  */
3147
3148 static void
3149 add_standard_iv_candidates (struct ivopts_data *data)
3150 {
3151   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3152
3153   /* The same for a double-integer type if it is still fast enough.  */
3154   if (TYPE_PRECISION
3155         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3156       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3157     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3158                    build_int_cst (long_integer_type_node, 1), true, NULL);
3159
3160   /* The same for a double-integer type if it is still fast enough.  */
3161   if (TYPE_PRECISION
3162         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3163       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3164     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3165                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3166 }
3167
3168
3169 /* Adds candidates bases on the old induction variable IV.  */
3170
3171 static void
3172 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3173 {
3174   gimple *phi;
3175   tree def;
3176   struct iv_cand *cand;
3177
3178   /* Check if this biv is used in address type use.  */
3179   if (iv->no_overflow  && iv->have_address_use
3180       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3181       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3182     {
3183       tree base = fold_convert (sizetype, iv->base);
3184       tree step = fold_convert (sizetype, iv->step);
3185
3186       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3187       add_candidate (data, base, step, true, NULL, iv);
3188       /* Add iv cand of the original type only if it has nonlinear use.  */
3189       if (iv->nonlin_use)
3190         add_candidate (data, iv->base, iv->step, true, NULL);
3191     }
3192   else
3193     add_candidate (data, iv->base, iv->step, true, NULL);
3194
3195   /* The same, but with initial value zero.  */
3196   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3197     add_candidate (data, size_int (0), iv->step, true, NULL);
3198   else
3199     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3200                    iv->step, true, NULL);
3201
3202   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3203   if (gimple_code (phi) == GIMPLE_PHI)
3204     {
3205       /* Additionally record the possibility of leaving the original iv
3206          untouched.  */
3207       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3208       /* Don't add candidate if it's from another PHI node because
3209          it's an affine iv appearing in the form of PEELED_CHREC.  */
3210       phi = SSA_NAME_DEF_STMT (def);
3211       if (gimple_code (phi) != GIMPLE_PHI)
3212         {
3213           cand = add_candidate_1 (data,
3214                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3215                                   SSA_NAME_DEF_STMT (def));
3216           if (cand)
3217             {
3218               cand->var_before = iv->ssa_name;
3219               cand->var_after = def;
3220             }
3221         }
3222       else
3223         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3224     }
3225 }
3226
3227 /* Adds candidates based on the old induction variables.  */
3228
3229 static void
3230 add_iv_candidate_for_bivs (struct ivopts_data *data)
3231 {
3232   unsigned i;
3233   struct iv *iv;
3234   bitmap_iterator bi;
3235
3236   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3237     {
3238       iv = ver_info (data, i)->iv;
3239       if (iv && iv->biv_p && !integer_zerop (iv->step))
3240         add_iv_candidate_for_biv (data, iv);
3241     }
3242 }
3243
3244 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3245
3246 static void
3247 record_common_cand (struct ivopts_data *data, tree base,
3248                     tree step, struct iv_use *use)
3249 {
3250   struct iv_common_cand ent;
3251   struct iv_common_cand **slot;
3252
3253   ent.base = base;
3254   ent.step = step;
3255   ent.hash = iterative_hash_expr (base, 0);
3256   ent.hash = iterative_hash_expr (step, ent.hash);
3257
3258   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3259   if (*slot == NULL)
3260     {
3261       *slot = new iv_common_cand ();
3262       (*slot)->base = base;
3263       (*slot)->step = step;
3264       (*slot)->uses.create (8);
3265       (*slot)->hash = ent.hash;
3266       data->iv_common_cands.safe_push ((*slot));
3267     }
3268
3269   gcc_assert (use != NULL);
3270   (*slot)->uses.safe_push (use);
3271   return;
3272 }
3273
3274 /* Comparison function used to sort common candidates.  */
3275
3276 static int
3277 common_cand_cmp (const void *p1, const void *p2)
3278 {
3279   unsigned n1, n2;
3280   const struct iv_common_cand *const *const ccand1
3281     = (const struct iv_common_cand *const *)p1;
3282   const struct iv_common_cand *const *const ccand2
3283     = (const struct iv_common_cand *const *)p2;
3284
3285   n1 = (*ccand1)->uses.length ();
3286   n2 = (*ccand2)->uses.length ();
3287   return n2 - n1;
3288 }
3289
3290 /* Adds IV candidates based on common candidated recorded.  */
3291
3292 static void
3293 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3294 {
3295   unsigned i, j;
3296   struct iv_cand *cand_1, *cand_2;
3297
3298   data->iv_common_cands.qsort (common_cand_cmp);
3299   for (i = 0; i < data->iv_common_cands.length (); i++)
3300     {
3301       struct iv_common_cand *ptr = data->iv_common_cands[i];
3302
3303       /* Only add IV candidate if it's derived from multiple uses.  */
3304       if (ptr->uses.length () <= 1)
3305         break;
3306
3307       cand_1 = NULL;
3308       cand_2 = NULL;
3309       if (ip_normal_pos (data->current_loop))
3310         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3311                                   false, IP_NORMAL, NULL, NULL);
3312
3313       if (ip_end_pos (data->current_loop)
3314           && allow_ip_end_pos_p (data->current_loop))
3315         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3316                                   false, IP_END, NULL, NULL);
3317
3318       /* Bind deriving uses and the new candidates.  */
3319       for (j = 0; j < ptr->uses.length (); j++)
3320         {
3321           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3322           if (cand_1)
3323             bitmap_set_bit (group->related_cands, cand_1->id);
3324           if (cand_2)
3325             bitmap_set_bit (group->related_cands, cand_2->id);
3326         }
3327     }
3328
3329   /* Release data since it is useless from this point.  */
3330   data->iv_common_cand_tab->empty ();
3331   data->iv_common_cands.truncate (0);
3332 }
3333
3334 /* Adds candidates based on the value of USE's iv.  */
3335
3336 static void
3337 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3338 {
3339   unsigned HOST_WIDE_INT offset;
3340   tree base;
3341   tree basetype;
3342   struct iv *iv = use->iv;
3343
3344   add_candidate (data, iv->base, iv->step, false, use);
3345
3346   /* Record common candidate for use in case it can be shared by others.  */
3347   record_common_cand (data, iv->base, iv->step, use);
3348
3349   /* Record common candidate with initial value zero.  */
3350   basetype = TREE_TYPE (iv->base);
3351   if (POINTER_TYPE_P (basetype))
3352     basetype = sizetype;
3353   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3354
3355   /* Record common candidate with constant offset stripped in base.
3356      Like the use itself, we also add candidate directly for it.  */
3357   base = strip_offset (iv->base, &offset);
3358   if (offset || base != iv->base)
3359     {
3360       record_common_cand (data, base, iv->step, use);
3361       add_candidate (data, base, iv->step, false, use);
3362     }
3363
3364   /* Record common candidate with base_object removed in base.  */
3365   base = iv->base;
3366   STRIP_NOPS (base);
3367   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3368     {
3369       tree step = iv->step;
3370
3371       STRIP_NOPS (step);
3372       base = TREE_OPERAND (base, 1);
3373       step = fold_convert (sizetype, step);
3374       record_common_cand (data, base, step, use);
3375       /* Also record common candidate with offset stripped.  */
3376       base = strip_offset (base, &offset);
3377       if (offset)
3378         record_common_cand (data, base, step, use);
3379     }
3380
3381   /* At last, add auto-incremental candidates.  Make such variables
3382      important since other iv uses with same base object may be based
3383      on it.  */
3384   if (use != NULL && use->type == USE_ADDRESS)
3385     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3386 }
3387
3388 /* Adds candidates based on the uses.  */
3389
3390 static void
3391 add_iv_candidate_for_groups (struct ivopts_data *data)
3392 {
3393   unsigned i;
3394
3395   /* Only add candidate for the first use in group.  */
3396   for (i = 0; i < data->vgroups.length (); i++)
3397     {
3398       struct iv_group *group = data->vgroups[i];
3399
3400       gcc_assert (group->vuses[0] != NULL);
3401       add_iv_candidate_for_use (data, group->vuses[0]);
3402     }
3403   add_iv_candidate_derived_from_uses (data);
3404 }
3405
3406 /* Record important candidates and add them to related_cands bitmaps.  */
3407
3408 static void
3409 record_important_candidates (struct ivopts_data *data)
3410 {
3411   unsigned i;
3412   struct iv_group *group;
3413
3414   for (i = 0; i < data->vcands.length (); i++)
3415     {
3416       struct iv_cand *cand = data->vcands[i];
3417
3418       if (cand->important)
3419         bitmap_set_bit (data->important_candidates, i);
3420     }
3421
3422   data->consider_all_candidates = (data->vcands.length ()
3423                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3424
3425   /* Add important candidates to groups' related_cands bitmaps.  */
3426   for (i = 0; i < data->vgroups.length (); i++)
3427     {
3428       group = data->vgroups[i];
3429       bitmap_ior_into (group->related_cands, data->important_candidates);
3430     }
3431 }
3432
3433 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3434    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3435    we allocate a simple list to every use.  */
3436
3437 static void
3438 alloc_use_cost_map (struct ivopts_data *data)
3439 {
3440   unsigned i, size, s;
3441
3442   for (i = 0; i < data->vgroups.length (); i++)
3443     {
3444       struct iv_group *group = data->vgroups[i];
3445
3446       if (data->consider_all_candidates)
3447         size = data->vcands.length ();
3448       else
3449         {
3450           s = bitmap_count_bits (group->related_cands);
3451
3452           /* Round up to the power of two, so that moduling by it is fast.  */
3453           size = s ? (1 << ceil_log2 (s)) : 1;
3454         }
3455
3456       group->n_map_members = size;
3457       group->cost_map = XCNEWVEC (struct cost_pair, size);
3458     }
3459 }
3460
3461 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3462    on invariants INV_VARS and that the value used in expressing it is
3463    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3464
3465 static void
3466 set_group_iv_cost (struct ivopts_data *data,
3467                    struct iv_group *group, struct iv_cand *cand,
3468                    comp_cost cost, bitmap inv_vars, tree value,
3469                    enum tree_code comp, bitmap inv_exprs)
3470 {
3471   unsigned i, s;
3472
3473   if (cost.infinite_cost_p ())
3474     {
3475       BITMAP_FREE (inv_vars);
3476       BITMAP_FREE (inv_exprs);
3477       return;
3478     }
3479
3480   if (data->consider_all_candidates)
3481     {
3482       group->cost_map[cand->id].cand = cand;
3483       group->cost_map[cand->id].cost = cost;
3484       group->cost_map[cand->id].inv_vars = inv_vars;
3485       group->cost_map[cand->id].inv_exprs = inv_exprs;
3486       group->cost_map[cand->id].value = value;
3487       group->cost_map[cand->id].comp = comp;
3488       return;
3489     }
3490
3491   /* n_map_members is a power of two, so this computes modulo.  */
3492   s = cand->id & (group->n_map_members - 1);
3493   for (i = s; i < group->n_map_members; i++)
3494     if (!group->cost_map[i].cand)
3495       goto found;
3496   for (i = 0; i < s; i++)
3497     if (!group->cost_map[i].cand)
3498       goto found;
3499
3500   gcc_unreachable ();
3501
3502 found:
3503   group->cost_map[i].cand = cand;
3504   group->cost_map[i].cost = cost;
3505   group->cost_map[i].inv_vars = inv_vars;
3506   group->cost_map[i].inv_exprs = inv_exprs;
3507   group->cost_map[i].value = value;
3508   group->cost_map[i].comp = comp;
3509 }
3510
3511 /* Gets cost of (GROUP, CAND) pair.  */
3512
3513 static struct cost_pair *
3514 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3515                    struct iv_cand *cand)
3516 {
3517   unsigned i, s;
3518   struct cost_pair *ret;
3519
3520   if (!cand)
3521     return NULL;
3522
3523   if (data->consider_all_candidates)
3524     {
3525       ret = group->cost_map + cand->id;
3526       if (!ret->cand)
3527         return NULL;
3528
3529       return ret;
3530     }
3531
3532   /* n_map_members is a power of two, so this computes modulo.  */
3533   s = cand->id & (group->n_map_members - 1);
3534   for (i = s; i < group->n_map_members; i++)
3535     if (group->cost_map[i].cand == cand)
3536       return group->cost_map + i;
3537     else if (group->cost_map[i].cand == NULL)
3538       return NULL;
3539   for (i = 0; i < s; i++)
3540     if (group->cost_map[i].cand == cand)
3541       return group->cost_map + i;
3542     else if (group->cost_map[i].cand == NULL)
3543       return NULL;
3544
3545   return NULL;
3546 }
3547
3548 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3549 static rtx
3550 produce_memory_decl_rtl (tree obj, int *regno)
3551 {
3552   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3553   machine_mode address_mode = targetm.addr_space.address_mode (as);
3554   rtx x;
3555
3556   gcc_assert (obj);
3557   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3558     {
3559       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3560       x = gen_rtx_SYMBOL_REF (address_mode, name);
3561       SET_SYMBOL_REF_DECL (x, obj);
3562       x = gen_rtx_MEM (DECL_MODE (obj), x);
3563       set_mem_addr_space (x, as);
3564       targetm.encode_section_info (obj, x, true);
3565     }
3566   else
3567     {
3568       x = gen_raw_REG (address_mode, (*regno)++);
3569       x = gen_rtx_MEM (DECL_MODE (obj), x);
3570       set_mem_addr_space (x, as);
3571     }
3572
3573   return x;
3574 }
3575
3576 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3577    walk_tree.  DATA contains the actual fake register number.  */
3578
3579 static tree
3580 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3581 {
3582   tree obj = NULL_TREE;
3583   rtx x = NULL_RTX;
3584   int *regno = (int *) data;
3585
3586   switch (TREE_CODE (*expr_p))
3587     {
3588     case ADDR_EXPR:
3589       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3590            handled_component_p (*expr_p);
3591            expr_p = &TREE_OPERAND (*expr_p, 0))
3592         continue;
3593       obj = *expr_p;
3594       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3595         x = produce_memory_decl_rtl (obj, regno);
3596       break;
3597
3598     case SSA_NAME:
3599       *ws = 0;
3600       obj = SSA_NAME_VAR (*expr_p);
3601       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3602       if (!obj)
3603         return NULL_TREE;
3604       if (!DECL_RTL_SET_P (obj))
3605         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3606       break;
3607
3608     case VAR_DECL:
3609     case PARM_DECL:
3610     case RESULT_DECL:
3611       *ws = 0;
3612       obj = *expr_p;
3613
3614       if (DECL_RTL_SET_P (obj))
3615         break;
3616
3617       if (DECL_MODE (obj) == BLKmode)
3618         x = produce_memory_decl_rtl (obj, regno);
3619       else
3620         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3621
3622       break;
3623
3624     default:
3625       break;
3626     }
3627
3628   if (x)
3629     {
3630       decl_rtl_to_reset.safe_push (obj);
3631       SET_DECL_RTL (obj, x);
3632     }
3633
3634   return NULL_TREE;
3635 }
3636
3637 /* Determines cost of the computation of EXPR.  */
3638
3639 static unsigned
3640 computation_cost (tree expr, bool speed)
3641 {
3642   rtx_insn *seq;
3643   rtx rslt;
3644   tree type = TREE_TYPE (expr);
3645   unsigned cost;
3646   /* Avoid using hard regs in ways which may be unsupported.  */
3647   int regno = LAST_VIRTUAL_REGISTER + 1;
3648   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3649   enum node_frequency real_frequency = node->frequency;
3650
3651   node->frequency = NODE_FREQUENCY_NORMAL;
3652   crtl->maybe_hot_insn_p = speed;
3653   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3654   start_sequence ();
3655   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3656   seq = get_insns ();
3657   end_sequence ();
3658   default_rtl_profile ();
3659   node->frequency = real_frequency;
3660
3661   cost = seq_cost (seq, speed);
3662   if (MEM_P (rslt))
3663     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3664                           TYPE_ADDR_SPACE (type), speed);
3665   else if (!REG_P (rslt))
3666     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3667
3668   return cost;
3669 }
3670
3671 /* Returns variable containing the value of candidate CAND at statement AT.  */
3672
3673 static tree
3674 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3675 {
3676   if (stmt_after_increment (loop, cand, stmt))
3677     return cand->var_after;
3678   else
3679     return cand->var_before;
3680 }
3681
3682 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3683    same precision that is at least as wide as the precision of TYPE, stores
3684    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3685    type of A and B.  */
3686
3687 static tree
3688 determine_common_wider_type (tree *a, tree *b)
3689 {
3690   tree wider_type = NULL;
3691   tree suba, subb;
3692   tree atype = TREE_TYPE (*a);
3693
3694   if (CONVERT_EXPR_P (*a))
3695     {
3696       suba = TREE_OPERAND (*a, 0);
3697       wider_type = TREE_TYPE (suba);
3698       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3699         return atype;
3700     }
3701   else
3702     return atype;
3703
3704   if (CONVERT_EXPR_P (*b))
3705     {
3706       subb = TREE_OPERAND (*b, 0);
3707       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3708         return atype;
3709     }
3710   else
3711     return atype;
3712
3713   *a = suba;
3714   *b = subb;
3715   return wider_type;
3716 }
3717
3718 /* Determines the expression by that USE is expressed from induction variable
3719    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3720    decomposed form.  The invariant part is stored in AFF_INV; while variant
3721    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3722    non-null.  Returns false if USE cannot be expressed using CAND.  */
3723
3724 static bool
3725 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3726                        struct iv_cand *cand, struct aff_tree *aff_inv,
3727                        struct aff_tree *aff_var, widest_int *prat = NULL)
3728 {
3729   tree ubase = use->iv->base, ustep = use->iv->step;
3730   tree cbase = cand->iv->base, cstep = cand->iv->step;
3731   tree common_type, uutype, var, cstep_common;
3732   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3733   aff_tree aff_cbase;
3734   widest_int rat;
3735
3736   /* We must have a precision to express the values of use.  */
3737   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3738     return false;
3739
3740   var = var_at_stmt (loop, cand, at);
3741   uutype = unsigned_type_for (utype);
3742
3743   /* If the conversion is not noop, perform it.  */
3744   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3745     {
3746       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3747           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3748         {
3749           tree inner_base, inner_step, inner_type;
3750           inner_base = TREE_OPERAND (cbase, 0);
3751           if (CONVERT_EXPR_P (cstep))
3752             inner_step = TREE_OPERAND (cstep, 0);
3753           else
3754             inner_step = cstep;
3755
3756           inner_type = TREE_TYPE (inner_base);
3757           /* If candidate is added from a biv whose type is smaller than
3758              ctype, we know both candidate and the biv won't overflow.
3759              In this case, it's safe to skip the convertion in candidate.
3760              As an example, (unsigned short)((unsigned long)A) equals to
3761              (unsigned short)A, if A has a type no larger than short.  */
3762           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3763             {
3764               cbase = inner_base;
3765               cstep = inner_step;
3766             }
3767         }
3768       cbase = fold_convert (uutype, cbase);
3769       cstep = fold_convert (uutype, cstep);
3770       var = fold_convert (uutype, var);
3771     }
3772
3773   /* Ratio is 1 when computing the value of biv cand by itself.
3774      We can't rely on constant_multiple_of in this case because the
3775      use is created after the original biv is selected.  The call
3776      could fail because of inconsistent fold behavior.  See PR68021
3777      for more information.  */
3778   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3779     {
3780       gcc_assert (is_gimple_assign (use->stmt));
3781       gcc_assert (use->iv->ssa_name == cand->var_after);
3782       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3783       rat = 1;
3784     }
3785   else if (!constant_multiple_of (ustep, cstep, &rat))
3786     return false;
3787
3788   if (prat)
3789     *prat = rat;
3790
3791   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3792      type, we achieve better folding by computing their difference in this
3793      wider type, and cast the result to UUTYPE.  We do not need to worry about
3794      overflows, as all the arithmetics will in the end be performed in UUTYPE
3795      anyway.  */
3796   common_type = determine_common_wider_type (&ubase, &cbase);
3797
3798   /* use = ubase - ratio * cbase + ratio * var.  */
3799   tree_to_aff_combination (ubase, common_type, aff_inv);
3800   tree_to_aff_combination (cbase, common_type, &aff_cbase);
3801   tree_to_aff_combination (var, uutype, aff_var);
3802
3803   /* We need to shift the value if we are after the increment.  */
3804   if (stmt_after_increment (loop, cand, at))
3805     {
3806       aff_tree cstep_aff;
3807
3808       if (common_type != uutype)
3809         cstep_common = fold_convert (common_type, cstep);
3810       else
3811         cstep_common = cstep;
3812
3813       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3814       aff_combination_add (&aff_cbase, &cstep_aff);
3815     }
3816
3817   aff_combination_scale (&aff_cbase, -rat);
3818   aff_combination_add (aff_inv, &aff_cbase);
3819   if (common_type != uutype)
3820     aff_combination_convert (aff_inv, uutype);
3821
3822   aff_combination_scale (aff_var, rat);
3823   return true;
3824 }
3825
3826 /* Determines the expression by that USE is expressed from induction variable
3827    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3828    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3829
3830 static bool
3831 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3832                      struct iv_cand *cand, struct aff_tree *aff)
3833 {
3834   aff_tree aff_var;
3835
3836   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3837     return false;
3838
3839   aff_combination_add (aff, &aff_var);
3840   return true;
3841 }
3842
3843 /* Return the type of USE.  */
3844
3845 static tree
3846 get_use_type (struct iv_use *use)
3847 {
3848   tree base_type = TREE_TYPE (use->iv->base);
3849   tree type;
3850
3851   if (use->type == USE_ADDRESS)
3852     {
3853       /* The base_type may be a void pointer.  Create a pointer type based on
3854          the mem_ref instead.  */
3855       type = build_pointer_type (TREE_TYPE (*use->op_p));
3856       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3857                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3858     }
3859   else
3860     type = base_type;
3861
3862   return type;
3863 }
3864
3865 /* Determines the expression by that USE is expressed from induction variable
3866    CAND at statement AT in LOOP.  The computation is unshared.  */
3867
3868 static tree
3869 get_computation_at (struct loop *loop, gimple *at,
3870                     struct iv_use *use, struct iv_cand *cand)
3871 {
3872   aff_tree aff;
3873   tree type = get_use_type (use);
3874
3875   if (!get_computation_aff (loop, at, use, cand, &aff))
3876     return NULL_TREE;
3877   unshare_aff_combination (&aff);
3878   return fold_convert (type, aff_combination_to_tree (&aff));
3879 }
3880
3881 /* Adjust the cost COST for being in loop setup rather than loop body.
3882    If we're optimizing for space, the loop setup overhead is constant;
3883    if we're optimizing for speed, amortize it over the per-iteration cost.
3884    If ROUND_UP_P is true, the result is round up rather than to zero when
3885    optimizing for speed.  */
3886 static unsigned
3887 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3888                    bool round_up_p = false)
3889 {
3890   if (cost == INFTY)
3891     return cost;
3892   else if (optimize_loop_for_speed_p (data->current_loop))
3893     {
3894       HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3895       return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3896     }
3897   else
3898     return cost;
3899 }
3900
3901 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3902    validity for a memory reference accessing memory of mode MODE in
3903    address space AS.  */
3904
3905
3906 bool
3907 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3908                                  addr_space_t as)
3909 {
3910 #define MAX_RATIO 128
3911   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3912   static vec<sbitmap> valid_mult_list;
3913   sbitmap valid_mult;
3914
3915   if (data_index >= valid_mult_list.length ())
3916     valid_mult_list.safe_grow_cleared (data_index + 1);
3917
3918   valid_mult = valid_mult_list[data_index];
3919   if (!valid_mult)
3920     {
3921       machine_mode address_mode = targetm.addr_space.address_mode (as);
3922       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3923       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3924       rtx addr, scaled;
3925       HOST_WIDE_INT i;
3926
3927       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3928       bitmap_clear (valid_mult);
3929       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3930       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3931       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3932         {
3933           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3934           if (memory_address_addr_space_p (mode, addr, as)
3935               || memory_address_addr_space_p (mode, scaled, as))
3936             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3937         }
3938
3939       if (dump_file && (dump_flags & TDF_DETAILS))
3940         {
3941           fprintf (dump_file, "  allowed multipliers:");
3942           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3943             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3944               fprintf (dump_file, " %d", (int) i);
3945           fprintf (dump_file, "\n");
3946           fprintf (dump_file, "\n");
3947         }
3948
3949       valid_mult_list[data_index] = valid_mult;
3950     }
3951
3952   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3953     return false;
3954
3955   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3956 }
3957
3958  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3959     EXPR operand holding the shift.  COST0 and COST1 are the costs for
3960     calculating the operands of EXPR.  Returns true if successful, and returns
3961     the cost in COST.  */
3962
3963 static bool
3964 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3965                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3966 {
3967   comp_cost res;
3968   tree op1 = TREE_OPERAND (expr, 1);
3969   tree cst = TREE_OPERAND (mult, 1);
3970   tree multop = TREE_OPERAND (mult, 0);
3971   int m = exact_log2 (int_cst_value (cst));
3972   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3973   int as_cost, sa_cost;
3974   bool mult_in_op1;
3975
3976   if (!(m >= 0 && m < maxm))
3977     return false;
3978
3979   STRIP_NOPS (op1);
3980   mult_in_op1 = operand_equal_p (op1, mult, 0);
3981
3982   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3983
3984   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3985      use that in preference to a shift insn followed by an add insn.  */
3986   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3987              ? shiftadd_cost (speed, mode, m)
3988              : (mult_in_op1
3989                 ? shiftsub1_cost (speed, mode, m)
3990                 : shiftsub0_cost (speed, mode, m)));
3991
3992   res = comp_cost (MIN (as_cost, sa_cost), 0);
3993   res += (mult_in_op1 ? cost0 : cost1);
3994
3995   STRIP_NOPS (multop);
3996   if (!is_gimple_val (multop))
3997     res += force_expr_to_var_cost (multop, speed);
3998
3999   *cost = res;
4000   return true;
4001 }
4002
4003 /* Estimates cost of forcing expression EXPR into a variable.  */
4004
4005 static comp_cost
4006 force_expr_to_var_cost (tree expr, bool speed)
4007 {
4008   static bool costs_initialized = false;
4009   static unsigned integer_cost [2];
4010   static unsigned symbol_cost [2];
4011   static unsigned address_cost [2];
4012   tree op0, op1;
4013   comp_cost cost0, cost1, cost;
4014   machine_mode mode;
4015
4016   if (!costs_initialized)
4017     {
4018       tree type = build_pointer_type (integer_type_node);
4019       tree var, addr;
4020       rtx x;
4021       int i;
4022
4023       var = create_tmp_var_raw (integer_type_node, "test_var");
4024       TREE_STATIC (var) = 1;
4025       x = produce_memory_decl_rtl (var, NULL);
4026       SET_DECL_RTL (var, x);
4027
4028       addr = build1 (ADDR_EXPR, type, var);
4029
4030
4031       for (i = 0; i < 2; i++)
4032         {
4033           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4034                                                              2000), i);
4035
4036           symbol_cost[i] = computation_cost (addr, i) + 1;
4037
4038           address_cost[i]
4039             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4040           if (dump_file && (dump_flags & TDF_DETAILS))
4041             {
4042               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4043               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4044               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4045               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4046               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4047               fprintf (dump_file, "\n");
4048             }
4049         }
4050
4051       costs_initialized = true;
4052     }
4053
4054   STRIP_NOPS (expr);
4055
4056   if (SSA_VAR_P (expr))
4057     return no_cost;
4058
4059   if (is_gimple_min_invariant (expr))
4060     {
4061       if (TREE_CODE (expr) == INTEGER_CST)
4062         return comp_cost (integer_cost [speed], 0);
4063
4064       if (TREE_CODE (expr) == ADDR_EXPR)
4065         {
4066           tree obj = TREE_OPERAND (expr, 0);
4067
4068           if (VAR_P (obj)
4069               || TREE_CODE (obj) == PARM_DECL
4070               || TREE_CODE (obj) == RESULT_DECL)
4071             return comp_cost (symbol_cost [speed], 0);
4072         }
4073
4074       return comp_cost (address_cost [speed], 0);
4075     }
4076
4077   switch (TREE_CODE (expr))
4078     {
4079     case POINTER_PLUS_EXPR:
4080     case PLUS_EXPR:
4081     case MINUS_EXPR:
4082     case MULT_EXPR:
4083       op0 = TREE_OPERAND (expr, 0);
4084       op1 = TREE_OPERAND (expr, 1);
4085       STRIP_NOPS (op0);
4086       STRIP_NOPS (op1);
4087       break;
4088
4089     CASE_CONVERT:
4090     case NEGATE_EXPR:
4091       op0 = TREE_OPERAND (expr, 0);
4092       STRIP_NOPS (op0);
4093       op1 = NULL_TREE;
4094       break;
4095
4096     default:
4097       /* Just an arbitrary value, FIXME.  */
4098       return comp_cost (target_spill_cost[speed], 0);
4099     }
4100
4101   if (op0 == NULL_TREE
4102       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4103     cost0 = no_cost;
4104   else
4105     cost0 = force_expr_to_var_cost (op0, speed);
4106
4107   if (op1 == NULL_TREE
4108       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4109     cost1 = no_cost;
4110   else
4111     cost1 = force_expr_to_var_cost (op1, speed);
4112
4113   mode = TYPE_MODE (TREE_TYPE (expr));
4114   switch (TREE_CODE (expr))
4115     {
4116     case POINTER_PLUS_EXPR:
4117     case PLUS_EXPR:
4118     case MINUS_EXPR:
4119     case NEGATE_EXPR:
4120       cost = comp_cost (add_cost (speed, mode), 0);
4121       if (TREE_CODE (expr) != NEGATE_EXPR)
4122         {
4123           tree mult = NULL_TREE;
4124           comp_cost sa_cost;
4125           if (TREE_CODE (op1) == MULT_EXPR)
4126             mult = op1;
4127           else if (TREE_CODE (op0) == MULT_EXPR)
4128             mult = op0;
4129
4130           if (mult != NULL_TREE
4131               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4132               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4133                                     speed, &sa_cost))
4134             return sa_cost;
4135         }
4136       break;
4137
4138     CASE_CONVERT:
4139       {
4140         tree inner_mode, outer_mode;
4141         outer_mode = TREE_TYPE (expr);
4142         inner_mode = TREE_TYPE (op0);
4143         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4144                                        TYPE_MODE (inner_mode), speed), 0);
4145       }
4146       break;
4147
4148     case MULT_EXPR:
4149       if (cst_and_fits_in_hwi (op0))
4150         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4151                                              mode, speed), 0);
4152       else if (cst_and_fits_in_hwi (op1))
4153         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4154                                              mode, speed), 0);
4155       else
4156         return comp_cost (target_spill_cost [speed], 0);
4157       break;
4158
4159     default:
4160       gcc_unreachable ();
4161     }
4162
4163   cost += cost0;
4164   cost += cost1;
4165   return cost;
4166 }
4167
4168 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4169    invariants the computation depends on.  */
4170
4171 static comp_cost
4172 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4173 {
4174   if (!expr)
4175     return no_cost;
4176
4177   find_inv_vars (data, &expr, inv_vars);
4178   return force_expr_to_var_cost (expr, data->speed);
4179 }
4180
4181 /* Returns cost of auto-modifying address expression in shape base + offset.
4182    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4183    address expression.  The address expression has ADDR_MODE in addr space
4184    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4185    speed or size.  */
4186
4187 enum ainc_type
4188 {
4189   AINC_PRE_INC,         /* Pre increment.  */
4190   AINC_PRE_DEC,         /* Pre decrement.  */
4191   AINC_POST_INC,        /* Post increment.  */
4192   AINC_POST_DEC,        /* Post decrement.  */
4193   AINC_NONE             /* Also the number of auto increment types.  */
4194 };
4195
4196 struct ainc_cost_data
4197 {
4198   unsigned costs[AINC_NONE];
4199 };
4200
4201 static comp_cost
4202 get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset,
4203                        machine_mode addr_mode, machine_mode mem_mode,
4204                        addr_space_t as, bool speed)
4205 {
4206   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4207       && !USE_STORE_PRE_DECREMENT (mem_mode)
4208       && !USE_LOAD_POST_DECREMENT (mem_mode)
4209       && !USE_STORE_POST_DECREMENT (mem_mode)
4210       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4211       && !USE_STORE_PRE_INCREMENT (mem_mode)
4212       && !USE_LOAD_POST_INCREMENT (mem_mode)
4213       && !USE_STORE_POST_INCREMENT (mem_mode))
4214     return infinite_cost;
4215
4216   static vec<ainc_cost_data *> ainc_cost_data_list;
4217   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4218   if (idx >= ainc_cost_data_list.length ())
4219     {
4220       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4221
4222       gcc_assert (nsize > idx);
4223       ainc_cost_data_list.safe_grow_cleared (nsize);
4224     }
4225
4226   ainc_cost_data *data = ainc_cost_data_list[idx];
4227   if (data == NULL)
4228     {
4229       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4230
4231       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4232       data->costs[AINC_PRE_DEC] = INFTY;
4233       data->costs[AINC_POST_DEC] = INFTY;
4234       data->costs[AINC_PRE_INC] = INFTY;
4235       data->costs[AINC_POST_INC] = INFTY;
4236       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4237           || USE_STORE_PRE_DECREMENT (mem_mode))
4238         {
4239           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4240
4241           if (memory_address_addr_space_p (mem_mode, addr, as))
4242             data->costs[AINC_PRE_DEC]
4243               = address_cost (addr, mem_mode, as, speed);
4244         }
4245       if (USE_LOAD_POST_DECREMENT (mem_mode)
4246           || USE_STORE_POST_DECREMENT (mem_mode))
4247         {
4248           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4249
4250           if (memory_address_addr_space_p (mem_mode, addr, as))
4251             data->costs[AINC_POST_DEC]
4252               = address_cost (addr, mem_mode, as, speed);
4253         }
4254       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4255           || USE_STORE_PRE_INCREMENT (mem_mode))
4256         {
4257           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4258
4259           if (memory_address_addr_space_p (mem_mode, addr, as))
4260             data->costs[AINC_PRE_INC]
4261               = address_cost (addr, mem_mode, as, speed);
4262         }
4263       if (USE_LOAD_POST_INCREMENT (mem_mode)
4264           || USE_STORE_POST_INCREMENT (mem_mode))
4265         {
4266           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4267
4268           if (memory_address_addr_space_p (mem_mode, addr, as))
4269             data->costs[AINC_POST_INC]
4270               = address_cost (addr, mem_mode, as, speed);
4271         }
4272       ainc_cost_data_list[idx] = data;
4273     }
4274
4275   HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4276   if (ainc_offset == 0 && msize == ainc_step)
4277     return comp_cost (data->costs[AINC_POST_INC], 0);
4278   if (ainc_offset == 0 && msize == -ainc_step)
4279     return comp_cost (data->costs[AINC_POST_DEC], 0);
4280   if (ainc_offset == msize && msize == ainc_step)
4281     return comp_cost (data->costs[AINC_PRE_INC], 0);
4282   if (ainc_offset == -msize && msize == -ainc_step)
4283     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4284
4285   return infinite_cost;
4286 }
4287
4288 /* Return cost of computing USE's address expression by using CAND.
4289    AFF_INV and AFF_VAR represent invariant and variant parts of the
4290    address expression, respectively.  If AFF_INV is simple, store
4291    the loop invariant variables which are depended by it in INV_VARS;
4292    if AFF_INV is complicated, handle it as a new invariant expression
4293    and record it in INV_EXPR.  RATIO indicates multiple times between
4294    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4295    value to it indicating if this is an auto-increment address.  */
4296
4297 static comp_cost
4298 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4299                   struct iv_cand *cand, aff_tree *aff_inv,
4300                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4301                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4302                   bool *can_autoinc, bool speed)
4303 {
4304   rtx addr;
4305   bool simple_inv = true;
4306   tree comp_inv = NULL_TREE, type = aff_var->type;
4307   comp_cost var_cost = no_cost, cost = no_cost;
4308   struct mem_address parts = {NULL_TREE, integer_one_node,
4309                               NULL_TREE, NULL_TREE, NULL_TREE};
4310   machine_mode addr_mode = TYPE_MODE (type);
4311   machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4312   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4313
4314   if (!aff_combination_const_p (aff_inv))
4315     {
4316       parts.index = integer_one_node;
4317       /* Addressing mode "base + index".  */
4318       if (valid_mem_ref_p (mem_mode, as, &parts))
4319         {
4320           parts.step = wide_int_to_tree (type, ratio);
4321           /* Addressing mode "base + index << scale".  */
4322           if (ratio != 1 && !valid_mem_ref_p (mem_mode, as, &parts))
4323             parts.step = NULL_TREE;
4324
4325           if (aff_inv->offset != 0)
4326             {
4327               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4328               /* Addressing mode "base + index [<< scale] + offset".  */
4329               if (!valid_mem_ref_p (mem_mode, as, &parts))
4330                 parts.offset = NULL_TREE;
4331               else
4332                 aff_inv->offset = 0;
4333             }
4334
4335           move_fixed_address_to_symbol (&parts, aff_inv);
4336           /* Base is fixed address and is moved to symbol part.  */
4337           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4338             parts.base = NULL_TREE;
4339
4340           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4341           if (parts.symbol != NULL_TREE
4342               && !valid_mem_ref_p (mem_mode, as, &parts))
4343             {
4344               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4345               parts.symbol = NULL_TREE;
4346               /* Reset SIMPLE_INV since symbol address needs to be computed
4347                  outside of address expression in this case.  */
4348               simple_inv = false;
4349               /* Symbol part is moved back to base part, it can't be NULL.  */
4350               parts.base = integer_one_node;
4351             }
4352         }
4353       else
4354         parts.index = NULL_TREE;
4355     }
4356   else
4357     {
4358       if (can_autoinc && ratio == 1 && cst_and_fits_in_hwi (cand->iv->step))
4359         {
4360           HOST_WIDE_INT ainc_step = int_cst_value (cand->iv->step);
4361           HOST_WIDE_INT ainc_offset = (aff_inv->offset).to_shwi ();
4362
4363           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4364             ainc_offset += ainc_step;
4365           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4366                                         addr_mode, mem_mode, as, speed);
4367           if (!cost.infinite_cost_p ())
4368             {
4369               *can_autoinc = true;
4370               return cost;
4371             }
4372           cost = no_cost;
4373         }
4374       if (!aff_combination_zero_p (aff_inv))
4375         {
4376           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4377           /* Addressing mode "base + offset".  */
4378           if (!valid_mem_ref_p (mem_mode, as, &parts))
4379             parts.offset = NULL_TREE;
4380           else
4381             aff_inv->offset = 0;
4382         }
4383     }
4384
4385   if (simple_inv)
4386     simple_inv = (aff_inv == NULL
4387                   || aff_combination_const_p (aff_inv)
4388                   || aff_combination_singleton_var_p (aff_inv));
4389   if (!aff_combination_zero_p (aff_inv))
4390     comp_inv = aff_combination_to_tree (aff_inv);
4391   if (comp_inv != NULL_TREE)
4392     cost = force_var_cost (data, comp_inv, inv_vars);
4393   if (ratio != 1 && parts.step == NULL_TREE)
4394     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4395   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4396     var_cost += add_cost (speed, addr_mode);
4397
4398   if (comp_inv && inv_expr && !simple_inv)
4399     {
4400       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4401       /* Clear depends on.  */
4402       if (*inv_expr != NULL && inv_vars && *inv_vars)
4403         bitmap_clear (*inv_vars);
4404
4405       /* Cost of small invariant expression adjusted against loop niters
4406          is usually zero, which makes it difficult to be differentiated
4407          from candidate based on loop invariant variables.  Secondly, the
4408          generated invariant expression may not be hoisted out of loop by
4409          following pass.  We penalize the cost by rounding up in order to
4410          neutralize such effects.  */
4411       cost.cost = adjust_setup_cost (data, cost.cost, true);
4412       cost.scratch = cost.cost;
4413     }
4414
4415   cost += var_cost;
4416   addr = addr_for_mem_ref (&parts, as, false);
4417   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4418   cost += address_cost (addr, mem_mode, as, speed);
4419
4420   if (parts.symbol != NULL_TREE)
4421     cost.complexity += 1;
4422   if (parts.step != NULL_TREE && !integer_onep (parts.step))
4423     cost.complexity += 1;
4424   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4425     cost.complexity += 1;
4426   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4427     cost.complexity += 1;
4428
4429   return cost;
4430 }
4431
4432 /* Scale (multiply) the computed COST (except scratch part that should be
4433    hoisted out a loop) by header->frequency / AT->frequency, which makes
4434    expected cost more accurate.  */
4435
4436 static comp_cost
4437 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4438 {
4439    int loop_freq = data->current_loop->header->frequency;
4440    int bb_freq = gimple_bb (at)->frequency;
4441    if (loop_freq != 0)
4442      {
4443        gcc_assert (cost.scratch <= cost.cost);
4444        int scaled_cost
4445          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4446
4447        if (dump_file && (dump_flags & TDF_DETAILS))
4448          fprintf (dump_file, "Scaling cost based on bb prob "
4449                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4450                   1.0f * bb_freq / loop_freq, cost.cost,
4451                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4452
4453        cost.cost = scaled_cost;
4454      }
4455
4456   return cost;
4457 }
4458
4459 /* Determines the cost of the computation by that USE is expressed
4460    from induction variable CAND.  If ADDRESS_P is true, we just need
4461    to create an address from it, otherwise we want to get it into
4462    register.  A set of invariants we depend on is stored in INV_VARS.
4463    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4464    addressing is likely.  If INV_EXPR is nonnull, record invariant
4465    expr entry in it.  */
4466
4467 static comp_cost
4468 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4469                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4470                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4471 {
4472   gimple *at = use->stmt;
4473   tree ubase = use->iv->base, cbase = cand->iv->base;
4474   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4475   tree comp_inv = NULL_TREE;
4476   HOST_WIDE_INT ratio, aratio;
4477   comp_cost cost;
4478   widest_int rat;
4479   aff_tree aff_inv, aff_var;
4480   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4481
4482   if (inv_vars)
4483     *inv_vars = NULL;
4484   if (can_autoinc)
4485     *can_autoinc = false;
4486   if (inv_expr)
4487     *inv_expr = NULL;
4488
4489   /* Check if we have enough precision to express the values of use.  */
4490   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4491     return infinite_cost;
4492
4493   if (address_p
4494       || (use->iv->base_object
4495           && cand->iv->base_object
4496           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4497           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4498     {
4499       /* Do not try to express address of an object with computation based
4500          on address of a different object.  This may cause problems in rtl
4501          level alias analysis (that does not expect this to be happening,
4502          as this is illegal in C), and would be unlikely to be useful
4503          anyway.  */
4504       if (use->iv->base_object
4505           && cand->iv->base_object
4506           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4507         return infinite_cost;
4508     }
4509
4510   if (!get_computation_aff_1 (data->current_loop, at, use,
4511                               cand, &aff_inv, &aff_var, &rat)
4512       || !wi::fits_shwi_p (rat))
4513     return infinite_cost;
4514
4515   ratio = rat.to_shwi ();
4516   if (address_p)
4517     {
4518       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4519                                inv_vars, inv_expr, can_autoinc, speed);
4520       return get_scaled_computation_cost_at (data, at, cost);
4521     }
4522
4523   bool simple_inv = (aff_combination_const_p (&aff_inv)
4524                      || aff_combination_singleton_var_p (&aff_inv));
4525   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4526   aff_combination_convert (&aff_inv, signed_type);
4527   if (!aff_combination_zero_p (&aff_inv))
4528     comp_inv = aff_combination_to_tree (&aff_inv);
4529
4530   cost = force_var_cost (data, comp_inv, inv_vars);
4531   if (comp_inv && inv_expr && !simple_inv)
4532     {
4533       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4534       /* Clear depends on.  */
4535       if (*inv_expr != NULL && inv_vars && *inv_vars)
4536         bitmap_clear (*inv_vars);
4537
4538       cost.cost = adjust_setup_cost (data, cost.cost);
4539       /* Record setup cost in scratch field.  */
4540       cost.scratch = cost.cost;
4541     }
4542   /* Cost of constant integer can be covered when adding invariant part to
4543      variant part.  */
4544   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4545     cost = no_cost;
4546
4547   /* Need type narrowing to represent use with cand.  */
4548   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4549     {
4550       machine_mode outer_mode = TYPE_MODE (utype);
4551       machine_mode inner_mode = TYPE_MODE (ctype);
4552       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4553     }
4554
4555   /* Turn a + i * (-c) into a - i * c.  */
4556   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4557     aratio = -ratio;
4558   else
4559     aratio = ratio;
4560
4561   if (ratio != 1)
4562     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4563
4564   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4565      instruction.  */
4566   /* Need to add up the invariant and variant parts.  */
4567   if (comp_inv && !integer_zerop (comp_inv))
4568     cost += add_cost (speed, TYPE_MODE (utype));
4569
4570   return get_scaled_computation_cost_at (data, at, cost);
4571 }
4572
4573 /* Determines cost of computing the use in GROUP with CAND in a generic
4574    expression.  */
4575
4576 static bool
4577 determine_group_iv_cost_generic (struct ivopts_data *data,
4578                                  struct iv_group *group, struct iv_cand *cand)
4579 {
4580   comp_cost cost;
4581   iv_inv_expr_ent *inv_expr = NULL;
4582   bitmap inv_vars = NULL, inv_exprs = NULL;
4583   struct iv_use *use = group->vuses[0];
4584
4585   /* The simple case first -- if we need to express value of the preserved
4586      original biv, the cost is 0.  This also prevents us from counting the
4587      cost of increment twice -- once at this use and once in the cost of
4588      the candidate.  */
4589   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4590     cost = no_cost;
4591   else
4592     cost = get_computation_cost (data, use, cand, false,
4593                                  &inv_vars, NULL, &inv_expr);
4594
4595   if (inv_expr)
4596     {
4597       inv_exprs = BITMAP_ALLOC (NULL);
4598       bitmap_set_bit (inv_exprs, inv_expr->id);
4599     }
4600   set_group_iv_cost (data, group, cand, cost, inv_vars,
4601                      NULL_TREE, ERROR_MARK, inv_exprs);
4602   return !cost.infinite_cost_p ();
4603 }
4604
4605 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4606
4607 static bool
4608 determine_group_iv_cost_address (struct ivopts_data *data,
4609                                  struct iv_group *group, struct iv_cand *cand)
4610 {
4611   unsigned i;
4612   bitmap inv_vars = NULL, inv_exprs = NULL;
4613   bool can_autoinc;
4614   iv_inv_expr_ent *inv_expr = NULL;
4615   struct iv_use *use = group->vuses[0];
4616   comp_cost sum_cost = no_cost, cost;
4617
4618   cost = get_computation_cost (data, use, cand, true,
4619                                &inv_vars, &can_autoinc, &inv_expr);
4620
4621   if (inv_expr)
4622     {
4623       inv_exprs = BITMAP_ALLOC (NULL);
4624       bitmap_set_bit (inv_exprs, inv_expr->id);
4625     }
4626   sum_cost = cost;
4627   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4628     {
4629       if (can_autoinc)
4630         sum_cost -= cand->cost_step;
4631       /* If we generated the candidate solely for exploiting autoincrement
4632          opportunities, and it turns out it can't be used, set the cost to
4633          infinity to make sure we ignore it.  */
4634       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4635         sum_cost = infinite_cost;
4636     }
4637
4638   /* Uses in a group can share setup code, so only add setup cost once.  */
4639   cost -= cost.scratch;
4640   /* Compute and add costs for rest uses of this group.  */
4641   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4642     {
4643       struct iv_use *next = group->vuses[i];
4644
4645       /* TODO: We could skip computing cost for sub iv_use when it has the
4646          same cost as the first iv_use, but the cost really depends on the
4647          offset and where the iv_use is.  */
4648         cost = get_computation_cost (data, next, cand, true,
4649                                      NULL, &can_autoinc, &inv_expr);
4650         if (inv_expr)
4651           {
4652             if (!inv_exprs)
4653               inv_exprs = BITMAP_ALLOC (NULL);
4654
4655             bitmap_set_bit (inv_exprs, inv_expr->id);
4656           }
4657       sum_cost += cost;
4658     }
4659   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4660                      NULL_TREE, ERROR_MARK, inv_exprs);
4661
4662   return !sum_cost.infinite_cost_p ();
4663 }
4664
4665 /* Computes value of candidate CAND at position AT in iteration NITER, and
4666    stores it to VAL.  */
4667
4668 static void
4669 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4670                aff_tree *val)
4671 {
4672   aff_tree step, delta, nit;
4673   struct iv *iv = cand->iv;
4674   tree type = TREE_TYPE (iv->base);
4675   tree steptype;
4676   if (POINTER_TYPE_P (type))
4677     steptype = sizetype;
4678   else
4679     steptype = unsigned_type_for (type);
4680
4681   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4682   aff_combination_convert (&step, steptype);
4683   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4684   aff_combination_convert (&nit, steptype);
4685   aff_combination_mult (&nit, &step, &delta);
4686   if (stmt_after_increment (loop, cand, at))
4687     aff_combination_add (&delta, &step);
4688
4689   tree_to_aff_combination (iv->base, type, val);
4690   if (!POINTER_TYPE_P (type))
4691     aff_combination_convert (val, steptype);
4692   aff_combination_add (val, &delta);
4693 }
4694
4695 /* Returns period of induction variable iv.  */
4696
4697 static tree
4698 iv_period (struct iv *iv)
4699 {
4700   tree step = iv->step, period, type;
4701   tree pow2div;
4702
4703   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4704
4705   type = unsigned_type_for (TREE_TYPE (step));
4706   /* Period of the iv is lcm (step, type_range)/step -1,
4707      i.e., N*type_range/step - 1. Since type range is power
4708      of two, N == (step >> num_of_ending_zeros_binary (step),
4709      so the final result is
4710
4711        (type_range >> num_of_ending_zeros_binary (step)) - 1
4712
4713   */
4714   pow2div = num_ending_zeros (step);
4715
4716   period = build_low_bits_mask (type,
4717                                 (TYPE_PRECISION (type)
4718                                  - tree_to_uhwi (pow2div)));
4719
4720   return period;
4721 }
4722
4723 /* Returns the comparison operator used when eliminating the iv USE.  */
4724
4725 static enum tree_code
4726 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4727 {
4728   struct loop *loop = data->current_loop;
4729   basic_block ex_bb;
4730   edge exit;
4731
4732   ex_bb = gimple_bb (use->stmt);
4733   exit = EDGE_SUCC (ex_bb, 0);
4734   if (flow_bb_inside_loop_p (loop, exit->dest))
4735     exit = EDGE_SUCC (ex_bb, 1);
4736
4737   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4738 }
4739
4740 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4741    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4742    calculation is performed in non-wrapping type.
4743
4744    TODO: More generally, we could test for the situation that
4745          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4746          This would require knowing the sign of OFFSET.  */
4747
4748 static bool
4749 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4750 {
4751   enum tree_code code;
4752   tree e1, e2;
4753   aff_tree aff_e1, aff_e2, aff_offset;
4754
4755   if (!nowrap_type_p (TREE_TYPE (base)))
4756     return false;
4757
4758   base = expand_simple_operations (base);
4759
4760   if (TREE_CODE (base) == SSA_NAME)
4761     {
4762       gimple *stmt = SSA_NAME_DEF_STMT (base);
4763
4764       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4765         return false;
4766
4767       code = gimple_assign_rhs_code (stmt);
4768       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4769         return false;
4770
4771       e1 = gimple_assign_rhs1 (stmt);
4772       e2 = gimple_assign_rhs2 (stmt);
4773     }
4774   else
4775     {
4776       code = TREE_CODE (base);
4777       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4778         return false;
4779       e1 = TREE_OPERAND (base, 0);
4780       e2 = TREE_OPERAND (base, 1);
4781     }
4782
4783   /* Use affine expansion as deeper inspection to prove the equality.  */
4784   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4785                                   &aff_e2, &data->name_expansion_cache);
4786   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4787                                   &aff_offset, &data->name_expansion_cache);
4788   aff_combination_scale (&aff_offset, -1);
4789   switch (code)
4790     {
4791     case PLUS_EXPR:
4792       aff_combination_add (&aff_e2, &aff_offset);
4793       if (aff_combination_zero_p (&aff_e2))
4794         return true;
4795
4796       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4797                                       &aff_e1, &data->name_expansion_cache);
4798       aff_combination_add (&aff_e1, &aff_offset);
4799       return aff_combination_zero_p (&aff_e1);
4800
4801     case POINTER_PLUS_EXPR:
4802       aff_combination_add (&aff_e2, &aff_offset);
4803       return aff_combination_zero_p (&aff_e2);
4804
4805     default:
4806       return false;
4807     }
4808 }
4809
4810 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4811    comparison with CAND.  NITER describes the number of iterations of
4812    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4813
4814    We aim to handle the following situation:
4815
4816    sometype *base, *p;
4817    int a, b, i;
4818
4819    i = a;
4820    p = p_0 = base + a;
4821
4822    do
4823      {
4824        bla (*p);
4825        p++;
4826        i++;
4827      }
4828    while (i < b);
4829
4830    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4831    We aim to optimize this to
4832
4833    p = p_0 = base + a;
4834    do
4835      {
4836        bla (*p);
4837        p++;
4838      }
4839    while (p < p_0 - a + b);
4840
4841    This preserves the correctness, since the pointer arithmetics does not
4842    overflow.  More precisely:
4843
4844    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4845       overflow in computing it or the values of p.
4846    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4847       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4848
4849 static bool
4850 iv_elimination_compare_lt (struct ivopts_data *data,
4851                            struct iv_cand *cand, enum tree_code *comp_p,
4852                            struct tree_niter_desc *niter)
4853 {
4854   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4855   struct aff_tree nit, tmpa, tmpb;
4856   enum tree_code comp;
4857   HOST_WIDE_INT step;
4858
4859   /* We need to know that the candidate induction variable does not overflow.
4860      While more complex analysis may be used to prove this, for now just
4861      check that the variable appears in the original program and that it
4862      is computed in a type that guarantees no overflows.  */
4863   cand_type = TREE_TYPE (cand->iv->base);
4864   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4865     return false;
4866
4867   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4868      the calculation of the BOUND could overflow, making the comparison
4869      invalid.  */
4870   if (!data->loop_single_exit_p)
4871     return false;
4872
4873   /* We need to be able to decide whether candidate is increasing or decreasing
4874      in order to choose the right comparison operator.  */
4875   if (!cst_and_fits_in_hwi (cand->iv->step))
4876     return false;
4877   step = int_cst_value (cand->iv->step);
4878
4879   /* Check that the number of iterations matches the expected pattern:
4880      a + 1 > b ? 0 : b - a - 1.  */
4881   mbz = niter->may_be_zero;
4882   if (TREE_CODE (mbz) == GT_EXPR)
4883     {
4884       /* Handle a + 1 > b.  */
4885       tree op0 = TREE_OPERAND (mbz, 0);
4886       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4887         {
4888           a = TREE_OPERAND (op0, 0);
4889           b = TREE_OPERAND (mbz, 1);
4890         }
4891       else
4892         return false;
4893     }
4894   else if (TREE_CODE (mbz) == LT_EXPR)
4895     {
4896       tree op1 = TREE_OPERAND (mbz, 1);
4897
4898       /* Handle b < a + 1.  */
4899       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4900         {
4901           a = TREE_OPERAND (op1, 0);
4902           b = TREE_OPERAND (mbz, 0);
4903         }
4904       else
4905         return false;
4906     }
4907   else
4908     return false;
4909
4910   /* Expected number of iterations is B - A - 1.  Check that it matches
4911      the actual number, i.e., that B - A - NITER = 1.  */
4912   tree_to_aff_combination (niter->niter, nit_type, &nit);
4913   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4914   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4915   aff_combination_scale (&nit, -1);
4916   aff_combination_scale (&tmpa, -1);
4917   aff_combination_add (&tmpb, &tmpa);
4918   aff_combination_add (&tmpb, &nit);
4919   if (tmpb.n != 0 || tmpb.offset != 1)
4920     return false;
4921
4922   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4923      overflow.  */
4924   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4925                         cand->iv->step,
4926                         fold_convert (TREE_TYPE (cand->iv->step), a));
4927   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4928     return false;
4929
4930   /* Determine the new comparison operator.  */
4931   comp = step < 0 ? GT_EXPR : LT_EXPR;
4932   if (*comp_p == NE_EXPR)
4933     *comp_p = comp;
4934   else if (*comp_p == EQ_EXPR)
4935     *comp_p = invert_tree_comparison (comp, false);
4936   else
4937     gcc_unreachable ();
4938
4939   return true;
4940 }
4941
4942 /* Check whether it is possible to express the condition in USE by comparison
4943    of candidate CAND.  If so, store the value compared with to BOUND, and the
4944    comparison operator to COMP.  */
4945
4946 static bool
4947 may_eliminate_iv (struct ivopts_data *data,
4948                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4949                   enum tree_code *comp)
4950 {
4951   basic_block ex_bb;
4952   edge exit;
4953   tree period;
4954   struct loop *loop = data->current_loop;
4955   aff_tree bnd;
4956   struct tree_niter_desc *desc = NULL;
4957
4958   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4959     return false;
4960
4961   /* For now works only for exits that dominate the loop latch.
4962      TODO: extend to other conditions inside loop body.  */
4963   ex_bb = gimple_bb (use->stmt);
4964   if (use->stmt != last_stmt (ex_bb)
4965       || gimple_code (use->stmt) != GIMPLE_COND
4966       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4967     return false;
4968
4969   exit = EDGE_SUCC (ex_bb, 0);
4970   if (flow_bb_inside_loop_p (loop, exit->dest))
4971     exit = EDGE_SUCC (ex_bb, 1);
4972   if (flow_bb_inside_loop_p (loop, exit->dest))
4973     return false;
4974
4975   desc = niter_for_exit (data, exit);
4976   if (!desc)
4977     return false;
4978
4979   /* Determine whether we can use the variable to test the exit condition.
4980      This is the case iff the period of the induction variable is greater
4981      than the number of iterations for which the exit condition is true.  */
4982   period = iv_period (cand->iv);
4983
4984   /* If the number of iterations is constant, compare against it directly.  */
4985   if (TREE_CODE (desc->niter) == INTEGER_CST)
4986     {
4987       /* See cand_value_at.  */
4988       if (stmt_after_increment (loop, cand, use->stmt))
4989         {
4990           if (!tree_int_cst_lt (desc->niter, period))
4991             return false;
4992         }
4993       else
4994         {
4995           if (tree_int_cst_lt (period, desc->niter))
4996             return false;
4997         }
4998     }
4999
5000   /* If not, and if this is the only possible exit of the loop, see whether
5001      we can get a conservative estimate on the number of iterations of the
5002      entire loop and compare against that instead.  */
5003   else
5004     {
5005       widest_int period_value, max_niter;
5006
5007       max_niter = desc->max;
5008       if (stmt_after_increment (loop, cand, use->stmt))
5009         max_niter += 1;
5010       period_value = wi::to_widest (period);
5011       if (wi::gtu_p (max_niter, period_value))
5012         {
5013           /* See if we can take advantage of inferred loop bound
5014              information.  */
5015           if (data->loop_single_exit_p)
5016             {
5017               if (!max_loop_iterations (loop, &max_niter))
5018                 return false;
5019               /* The loop bound is already adjusted by adding 1.  */
5020               if (wi::gtu_p (max_niter, period_value))
5021                 return false;
5022             }
5023           else
5024             return false;
5025         }
5026     }
5027
5028   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5029
5030   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5031                          aff_combination_to_tree (&bnd));
5032   *comp = iv_elimination_compare (data, use);
5033
5034   /* It is unlikely that computing the number of iterations using division
5035      would be more profitable than keeping the original induction variable.  */
5036   if (expression_expensive_p (*bound))
5037     return false;
5038
5039   /* Sometimes, it is possible to handle the situation that the number of
5040      iterations may be zero unless additional assumptions by using <
5041      instead of != in the exit condition.
5042
5043      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5044            base the exit condition on it.  However, that is often too
5045            expensive.  */
5046   if (!integer_zerop (desc->may_be_zero))
5047     return iv_elimination_compare_lt (data, cand, comp, desc);
5048
5049   return true;
5050 }
5051
5052  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5053     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5054
5055 static int
5056 parm_decl_cost (struct ivopts_data *data, tree bound)
5057 {
5058   tree sbound = bound;
5059   STRIP_NOPS (sbound);
5060
5061   if (TREE_CODE (sbound) == SSA_NAME
5062       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5063       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5064       && data->body_includes_call)
5065     return COSTS_N_INSNS (1);
5066
5067   return 0;
5068 }
5069
5070 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5071
5072 static bool
5073 determine_group_iv_cost_cond (struct ivopts_data *data,
5074                               struct iv_group *group, struct iv_cand *cand)
5075 {
5076   tree bound = NULL_TREE;
5077   struct iv *cmp_iv;
5078   bitmap inv_exprs = NULL;
5079   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5080   comp_cost elim_cost, express_cost, cost, bound_cost;
5081   bool ok;
5082   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5083   tree *control_var, *bound_cst;
5084   enum tree_code comp = ERROR_MARK;
5085   struct iv_use *use = group->vuses[0];
5086
5087   /* Try iv elimination.  */
5088   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5089     {
5090       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5091       if (elim_cost.cost == 0)
5092         elim_cost.cost = parm_decl_cost (data, bound);
5093       else if (TREE_CODE (bound) == INTEGER_CST)
5094         elim_cost.cost = 0;
5095       /* If we replace a loop condition 'i < n' with 'p < base + n',
5096          inv_vars_elim will have 'base' and 'n' set, which implies that both
5097          'base' and 'n' will be live during the loop.    More likely,
5098          'base + n' will be loop invariant, resulting in only one live value
5099          during the loop.  So in that case we clear inv_vars_elim and set
5100          inv_expr_elim instead.  */
5101       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5102         {
5103           inv_expr_elim = get_loop_invariant_expr (data, bound);
5104           bitmap_clear (inv_vars_elim);
5105         }
5106       /* The bound is a loop invariant, so it will be only computed
5107          once.  */
5108       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5109     }
5110   else
5111     elim_cost = infinite_cost;
5112
5113   /* Try expressing the original giv.  If it is compared with an invariant,
5114      note that we cannot get rid of it.  */
5115   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5116                               NULL, &cmp_iv);
5117   gcc_assert (ok);
5118
5119   /* When the condition is a comparison of the candidate IV against
5120      zero, prefer this IV.
5121
5122      TODO: The constant that we're subtracting from the cost should
5123      be target-dependent.  This information should be added to the
5124      target costs for each backend.  */
5125   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5126       && integer_zerop (*bound_cst)
5127       && (operand_equal_p (*control_var, cand->var_after, 0)
5128           || operand_equal_p (*control_var, cand->var_before, 0)))
5129     elim_cost -= 1;
5130
5131   express_cost = get_computation_cost (data, use, cand, false,
5132                                        &inv_vars_express, NULL,
5133                                        &inv_expr_express);
5134   if (cmp_iv != NULL)
5135     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5136
5137   /* Count the cost of the original bound as well.  */
5138   bound_cost = force_var_cost (data, *bound_cst, NULL);
5139   if (bound_cost.cost == 0)
5140     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5141   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5142     bound_cost.cost = 0;
5143   express_cost += bound_cost;
5144
5145   /* Choose the better approach, preferring the eliminated IV. */
5146   if (elim_cost <= express_cost)
5147     {
5148       cost = elim_cost;
5149       inv_vars = inv_vars_elim;
5150       inv_vars_elim = NULL;
5151       inv_expr = inv_expr_elim;
5152     }
5153   else
5154     {
5155       cost = express_cost;
5156       inv_vars = inv_vars_express;
5157       inv_vars_express = NULL;
5158       bound = NULL_TREE;
5159       comp = ERROR_MARK;
5160       inv_expr = inv_expr_express;
5161     }
5162
5163   if (inv_expr)
5164     {
5165       inv_exprs = BITMAP_ALLOC (NULL);
5166       bitmap_set_bit (inv_exprs, inv_expr->id);
5167     }
5168   set_group_iv_cost (data, group, cand, cost,
5169                      inv_vars, bound, comp, inv_exprs);
5170
5171   if (inv_vars_elim)
5172     BITMAP_FREE (inv_vars_elim);
5173   if (inv_vars_express)
5174     BITMAP_FREE (inv_vars_express);
5175
5176   return !cost.infinite_cost_p ();
5177 }
5178
5179 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5180    if USE cannot be represented with CAND.  */
5181
5182 static bool
5183 determine_group_iv_cost (struct ivopts_data *data,
5184                          struct iv_group *group, struct iv_cand *cand)
5185 {
5186   switch (group->type)
5187     {
5188     case USE_NONLINEAR_EXPR:
5189       return determine_group_iv_cost_generic (data, group, cand);
5190
5191     case USE_ADDRESS:
5192       return determine_group_iv_cost_address (data, group, cand);
5193
5194     case USE_COMPARE:
5195       return determine_group_iv_cost_cond (data, group, cand);
5196
5197     default:
5198       gcc_unreachable ();
5199     }
5200 }
5201
5202 /* Return true if get_computation_cost indicates that autoincrement is
5203    a possibility for the pair of USE and CAND, false otherwise.  */
5204
5205 static bool
5206 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5207                            struct iv_cand *cand)
5208 {
5209   bitmap inv_vars;
5210   bool can_autoinc;
5211   comp_cost cost;
5212
5213   if (use->type != USE_ADDRESS)
5214     return false;
5215
5216   cost = get_computation_cost (data, use, cand, true, &inv_vars,
5217                                &can_autoinc, NULL);
5218
5219   BITMAP_FREE (inv_vars);
5220
5221   return !cost.infinite_cost_p () && can_autoinc;
5222 }
5223
5224 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5225    use that allows autoincrement, and set their AINC_USE if possible.  */
5226
5227 static void
5228 set_autoinc_for_original_candidates (struct ivopts_data *data)
5229 {
5230   unsigned i, j;
5231
5232   for (i = 0; i < data->vcands.length (); i++)
5233     {
5234       struct iv_cand *cand = data->vcands[i];
5235       struct iv_use *closest_before = NULL;
5236       struct iv_use *closest_after = NULL;
5237       if (cand->pos != IP_ORIGINAL)
5238         continue;
5239
5240       for (j = 0; j < data->vgroups.length (); j++)
5241         {
5242           struct iv_group *group = data->vgroups[j];
5243           struct iv_use *use = group->vuses[0];
5244           unsigned uid = gimple_uid (use->stmt);
5245
5246           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5247             continue;
5248
5249           if (uid < gimple_uid (cand->incremented_at)
5250               && (closest_before == NULL
5251                   || uid > gimple_uid (closest_before->stmt)))
5252             closest_before = use;
5253
5254           if (uid > gimple_uid (cand->incremented_at)
5255               && (closest_after == NULL
5256                   || uid < gimple_uid (closest_after->stmt)))
5257             closest_after = use;
5258         }
5259
5260       if (closest_before != NULL
5261           && autoinc_possible_for_pair (data, closest_before, cand))
5262         cand->ainc_use = closest_before;
5263       else if (closest_after != NULL
5264                && autoinc_possible_for_pair (data, closest_after, cand))
5265         cand->ainc_use = closest_after;
5266     }
5267 }
5268
5269 /* Finds the candidates for the induction variables.  */
5270
5271 static void
5272 find_iv_candidates (struct ivopts_data *data)
5273 {
5274   /* Add commonly used ivs.  */
5275   add_standard_iv_candidates (data);
5276
5277   /* Add old induction variables.  */
5278   add_iv_candidate_for_bivs (data);
5279
5280   /* Add induction variables derived from uses.  */
5281   add_iv_candidate_for_groups (data);
5282
5283   set_autoinc_for_original_candidates (data);
5284
5285   /* Record the important candidates.  */
5286   record_important_candidates (data);
5287
5288   if (dump_file && (dump_flags & TDF_DETAILS))
5289     {
5290       unsigned i;
5291
5292       fprintf (dump_file, "\n<Important Candidates>:\t");
5293       for (i = 0; i < data->vcands.length (); i++)
5294         if (data->vcands[i]->important)
5295           fprintf (dump_file, " %d,", data->vcands[i]->id);
5296       fprintf (dump_file, "\n");
5297
5298       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5299       for (i = 0; i < data->vgroups.length (); i++)
5300         {
5301           struct iv_group *group = data->vgroups[i];
5302
5303           if (group->related_cands)
5304             {
5305               fprintf (dump_file, "  Group %d:\t", group->id);
5306               dump_bitmap (dump_file, group->related_cands);
5307             }
5308         }
5309       fprintf (dump_file, "\n");
5310     }
5311 }
5312
5313 /* Determines costs of computing use of iv with an iv candidate.  */
5314
5315 static void
5316 determine_group_iv_costs (struct ivopts_data *data)
5317 {
5318   unsigned i, j;
5319   struct iv_cand *cand;
5320   struct iv_group *group;
5321   bitmap to_clear = BITMAP_ALLOC (NULL);
5322
5323   alloc_use_cost_map (data);
5324
5325   for (i = 0; i < data->vgroups.length (); i++)
5326     {
5327       group = data->vgroups[i];
5328
5329       if (data->consider_all_candidates)
5330         {
5331           for (j = 0; j < data->vcands.length (); j++)
5332             {
5333               cand = data->vcands[j];
5334               determine_group_iv_cost (data, group, cand);
5335             }
5336         }
5337       else
5338         {
5339           bitmap_iterator bi;
5340
5341           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5342             {
5343               cand = data->vcands[j];
5344               if (!determine_group_iv_cost (data, group, cand))
5345                 bitmap_set_bit (to_clear, j);
5346             }
5347
5348           /* Remove the candidates for that the cost is infinite from
5349              the list of related candidates.  */
5350           bitmap_and_compl_into (group->related_cands, to_clear);
5351           bitmap_clear (to_clear);
5352         }
5353     }
5354
5355   BITMAP_FREE (to_clear);
5356
5357   if (dump_file && (dump_flags & TDF_DETAILS))
5358     {
5359       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5360       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5361
5362       for (hash_table<iv_inv_expr_hasher>::iterator it
5363            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5364            ++it)
5365         list.safe_push (*it);
5366
5367       list.qsort (sort_iv_inv_expr_ent);
5368
5369       for (i = 0; i < list.length (); ++i)
5370         {
5371           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5372           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5373           fprintf (dump_file, "\n");
5374         }
5375
5376       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5377
5378       for (i = 0; i < data->vgroups.length (); i++)
5379         {
5380           group = data->vgroups[i];
5381
5382           fprintf (dump_file, "Group %d:\n", i);
5383           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5384           for (j = 0; j < group->n_map_members; j++)
5385             {
5386               if (!group->cost_map[j].cand
5387                   || group->cost_map[j].cost.infinite_cost_p ())
5388                 continue;
5389
5390               fprintf (dump_file, "  %d\t%d\t%d\t",
5391                        group->cost_map[j].cand->id,
5392                        group->cost_map[j].cost.cost,
5393                        group->cost_map[j].cost.complexity);
5394               if (!group->cost_map[j].inv_exprs
5395                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5396                 fprintf (dump_file, "NIL;\t");
5397               else
5398                 bitmap_print (dump_file,
5399                               group->cost_map[j].inv_exprs, "", ";\t");
5400               if (!group->cost_map[j].inv_vars
5401                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5402                 fprintf (dump_file, "NIL;\n");
5403               else
5404                 bitmap_print (dump_file,
5405                               group->cost_map[j].inv_vars, "", "\n");
5406             }
5407
5408           fprintf (dump_file, "\n");
5409         }
5410       fprintf (dump_file, "\n");
5411     }
5412 }
5413
5414 /* Determines cost of the candidate CAND.  */
5415
5416 static void
5417 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5418 {
5419   comp_cost cost_base;
5420   unsigned cost, cost_step;
5421   tree base;
5422
5423   gcc_assert (cand->iv != NULL);
5424
5425   /* There are two costs associated with the candidate -- its increment
5426      and its initialization.  The second is almost negligible for any loop
5427      that rolls enough, so we take it just very little into account.  */
5428
5429   base = cand->iv->base;
5430   cost_base = force_var_cost (data, base, NULL);
5431   /* It will be exceptional that the iv register happens to be initialized with
5432      the proper value at no cost.  In general, there will at least be a regcopy
5433      or a const set.  */
5434   if (cost_base.cost == 0)
5435     cost_base.cost = COSTS_N_INSNS (1);
5436   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5437
5438   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5439
5440   /* Prefer the original ivs unless we may gain something by replacing it.
5441      The reason is to make debugging simpler; so this is not relevant for
5442      artificial ivs created by other optimization passes.  */
5443   if (cand->pos != IP_ORIGINAL
5444       || !SSA_NAME_VAR (cand->var_before)
5445       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5446     cost++;
5447
5448   /* Prefer not to insert statements into latch unless there are some
5449      already (so that we do not create unnecessary jumps).  */
5450   if (cand->pos == IP_END
5451       && empty_block_p (ip_end_pos (data->current_loop)))
5452     cost++;
5453
5454   cand->cost = cost;
5455   cand->cost_step = cost_step;
5456 }
5457
5458 /* Determines costs of computation of the candidates.  */
5459
5460 static void
5461 determine_iv_costs (struct ivopts_data *data)
5462 {
5463   unsigned i;
5464
5465   if (dump_file && (dump_flags & TDF_DETAILS))
5466     {
5467       fprintf (dump_file, "<Candidate Costs>:\n");
5468       fprintf (dump_file, "  cand\tcost\n");
5469     }
5470
5471   for (i = 0; i < data->vcands.length (); i++)
5472     {
5473       struct iv_cand *cand = data->vcands[i];
5474
5475       determine_iv_cost (data, cand);
5476
5477       if (dump_file && (dump_flags & TDF_DETAILS))
5478         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5479     }
5480
5481   if (dump_file && (dump_flags & TDF_DETAILS))
5482     fprintf (dump_file, "\n");
5483 }
5484
5485 /* Calculates cost for having N_REGS registers.  This number includes
5486    induction variables, invariant variables and invariant expressions.  */
5487
5488 static unsigned
5489 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned n_regs)
5490 {
5491   unsigned cost = estimate_reg_pressure_cost (n_regs,
5492                                               data->regs_used, data->speed,
5493                                               data->body_includes_call);
5494   /* Add n_regs to the cost, so that we prefer eliminating ivs if possible.  */
5495   return n_regs + cost;
5496 }
5497
5498 /* For each size of the induction variable set determine the penalty.  */
5499
5500 static void
5501 determine_set_costs (struct ivopts_data *data)
5502 {
5503   unsigned j, n;
5504   gphi *phi;
5505   gphi_iterator psi;
5506   tree op;
5507   struct loop *loop = data->current_loop;
5508   bitmap_iterator bi;
5509
5510   if (dump_file && (dump_flags & TDF_DETAILS))
5511     {
5512       fprintf (dump_file, "<Global Costs>:\n");
5513       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5514       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5515       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5516       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5517     }
5518
5519   n = 0;
5520   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5521     {
5522       phi = psi.phi ();
5523       op = PHI_RESULT (phi);
5524
5525       if (virtual_operand_p (op))
5526         continue;
5527
5528       if (get_iv (data, op))
5529         continue;
5530
5531       n++;
5532     }
5533
5534   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5535     {
5536       struct version_info *info = ver_info (data, j);
5537
5538       if (info->inv_id && info->has_nonlin_use)
5539         n++;
5540     }
5541
5542   data->regs_used = n;
5543   if (dump_file && (dump_flags & TDF_DETAILS))
5544     fprintf (dump_file, "  regs_used %d\n", n);
5545
5546   if (dump_file && (dump_flags & TDF_DETAILS))
5547     {
5548       fprintf (dump_file, "  cost for size:\n");
5549       fprintf (dump_file, "  ivs\tcost\n");
5550       for (j = 0; j <= 2 * target_avail_regs; j++)
5551         fprintf (dump_file, "  %d\t%d\n", j,
5552                  ivopts_global_cost_for_size (data, j));
5553       fprintf (dump_file, "\n");
5554     }
5555 }
5556
5557 /* Returns true if A is a cheaper cost pair than B.  */
5558
5559 static bool
5560 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5561 {
5562   if (!a)
5563     return false;
5564
5565   if (!b)
5566     return true;
5567
5568   if (a->cost < b->cost)
5569     return true;
5570
5571   if (b->cost < a->cost)
5572     return false;
5573
5574   /* In case the costs are the same, prefer the cheaper candidate.  */
5575   if (a->cand->cost < b->cand->cost)
5576     return true;
5577
5578   return false;
5579 }
5580
5581
5582 /* Returns candidate by that USE is expressed in IVS.  */
5583
5584 static struct cost_pair *
5585 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5586 {
5587   return ivs->cand_for_group[group->id];
5588 }
5589
5590 /* Computes the cost field of IVS structure.  */
5591
5592 static void
5593 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5594 {
5595   comp_cost cost = ivs->cand_use_cost;
5596
5597   cost += ivs->cand_cost;
5598   cost += ivopts_global_cost_for_size (data, ivs->n_invs + ivs->n_cands);
5599   ivs->cost = cost;
5600 }
5601
5602 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5603    and IVS.  */
5604
5605 static void
5606 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5607 {
5608   bitmap_iterator bi;
5609   unsigned iid;
5610
5611   if (!invs)
5612     return;
5613
5614   gcc_assert (n_inv_uses != NULL);
5615   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5616     {
5617       n_inv_uses[iid]--;
5618       if (n_inv_uses[iid] == 0)
5619         ivs->n_invs--;
5620     }
5621 }
5622
5623 /* Set USE not to be expressed by any candidate in IVS.  */
5624
5625 static void
5626 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5627                  struct iv_group *group)
5628 {
5629   unsigned gid = group->id, cid;
5630   struct cost_pair *cp;
5631
5632   cp = ivs->cand_for_group[gid];
5633   if (!cp)
5634     return;
5635   cid = cp->cand->id;
5636
5637   ivs->bad_groups++;
5638   ivs->cand_for_group[gid] = NULL;
5639   ivs->n_cand_uses[cid]--;
5640
5641   if (ivs->n_cand_uses[cid] == 0)
5642     {
5643       bitmap_clear_bit (ivs->cands, cid);
5644       ivs->n_cands--;
5645       ivs->cand_cost -= cp->cand->cost;
5646       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5647     }
5648
5649   ivs->cand_use_cost -= cp->cost;
5650   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5651   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5652   iv_ca_recount_cost (data, ivs);
5653 }
5654
5655 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5656    IVS.  */
5657
5658 static void
5659 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5660 {
5661   bitmap_iterator bi;
5662   unsigned iid;
5663
5664   if (!invs)
5665     return;
5666
5667   gcc_assert (n_inv_uses != NULL);
5668   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5669     {
5670       n_inv_uses[iid]++;
5671       if (n_inv_uses[iid] == 1)
5672         ivs->n_invs++;
5673     }
5674 }
5675
5676 /* Set cost pair for GROUP in set IVS to CP.  */
5677
5678 static void
5679 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5680               struct iv_group *group, struct cost_pair *cp)
5681 {
5682   unsigned gid = group->id, cid;
5683
5684   if (ivs->cand_for_group[gid] == cp)
5685     return;
5686
5687   if (ivs->cand_for_group[gid])
5688     iv_ca_set_no_cp (data, ivs, group);
5689
5690   if (cp)
5691     {
5692       cid = cp->cand->id;
5693
5694       ivs->bad_groups--;
5695       ivs->cand_for_group[gid] = cp;
5696       ivs->n_cand_uses[cid]++;
5697       if (ivs->n_cand_uses[cid] == 1)
5698         {
5699           bitmap_set_bit (ivs->cands, cid);
5700           ivs->n_cands++;
5701           ivs->cand_cost += cp->cand->cost;
5702           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5703         }
5704
5705       ivs->cand_use_cost += cp->cost;
5706       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5707       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5708       iv_ca_recount_cost (data, ivs);
5709     }
5710 }
5711
5712 /* Extend set IVS by expressing USE by some of the candidates in it
5713    if possible.  Consider all important candidates if candidates in
5714    set IVS don't give any result.  */
5715
5716 static void
5717 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5718                struct iv_group *group)
5719 {
5720   struct cost_pair *best_cp = NULL, *cp;
5721   bitmap_iterator bi;
5722   unsigned i;
5723   struct iv_cand *cand;
5724
5725   gcc_assert (ivs->upto >= group->id);
5726   ivs->upto++;
5727   ivs->bad_groups++;
5728
5729   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5730     {
5731       cand = data->vcands[i];
5732       cp = get_group_iv_cost (data, group, cand);
5733       if (cheaper_cost_pair (cp, best_cp))
5734         best_cp = cp;
5735     }
5736
5737   if (best_cp == NULL)
5738     {
5739       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5740         {
5741           cand = data->vcands[i];
5742           cp = get_group_iv_cost (data, group, cand);
5743           if (cheaper_cost_pair (cp, best_cp))
5744             best_cp = cp;
5745         }
5746     }
5747
5748   iv_ca_set_cp (data, ivs, group, best_cp);
5749 }
5750
5751 /* Get cost for assignment IVS.  */
5752
5753 static comp_cost
5754 iv_ca_cost (struct iv_ca *ivs)
5755 {
5756   /* This was a conditional expression but it triggered a bug in
5757      Sun C 5.5.  */
5758   if (ivs->bad_groups)
5759     return infinite_cost;
5760   else
5761     return ivs->cost;
5762 }
5763
5764 /* Returns true if applying NEW_CP to GROUP for IVS introduces more
5765    invariants than OLD_CP.  */
5766
5767 static bool
5768 iv_ca_more_deps (struct ivopts_data *data, struct iv_ca *ivs,
5769                  struct iv_group *group, struct cost_pair *old_cp,
5770                  struct cost_pair *new_cp)
5771 {
5772   gcc_assert (old_cp && new_cp && old_cp != new_cp);
5773   unsigned old_n_invs = ivs->n_invs;
5774   iv_ca_set_cp (data, ivs, group, new_cp);
5775   unsigned new_n_invs = ivs->n_invs;
5776   iv_ca_set_cp (data, ivs, group, old_cp);
5777
5778   return (new_n_invs > old_n_invs);
5779 }
5780
5781 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5782    it before NEXT.  */
5783
5784 static struct iv_ca_delta *
5785 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5786                  struct cost_pair *new_cp, struct iv_ca_delta *next)
5787 {
5788   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5789
5790   change->group = group;
5791   change->old_cp = old_cp;
5792   change->new_cp = new_cp;
5793   change->next = next;
5794
5795   return change;
5796 }
5797
5798 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5799    are rewritten.  */
5800
5801 static struct iv_ca_delta *
5802 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5803 {
5804   struct iv_ca_delta *last;
5805
5806   if (!l2)
5807     return l1;
5808
5809   if (!l1)
5810     return l2;
5811
5812   for (last = l1; last->next; last = last->next)
5813     continue;
5814   last->next = l2;
5815
5816   return l1;
5817 }
5818
5819 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5820
5821 static struct iv_ca_delta *
5822 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5823 {
5824   struct iv_ca_delta *act, *next, *prev = NULL;
5825
5826   for (act = delta; act; act = next)
5827     {
5828       next = act->next;
5829       act->next = prev;
5830       prev = act;
5831
5832       std::swap (act->old_cp, act->new_cp);
5833     }
5834
5835   return prev;
5836 }
5837
5838 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5839    reverted instead.  */
5840
5841 static void
5842 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5843                     struct iv_ca_delta *delta, bool forward)
5844 {
5845   struct cost_pair *from, *to;
5846   struct iv_ca_delta *act;
5847
5848   if (!forward)
5849     delta = iv_ca_delta_reverse (delta);
5850
5851   for (act = delta; act; act = act->next)
5852     {
5853       from = act->old_cp;
5854       to = act->new_cp;
5855       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5856       iv_ca_set_cp (data, ivs, act->group, to);
5857     }
5858
5859   if (!forward)
5860     iv_ca_delta_reverse (delta);
5861 }
5862
5863 /* Returns true if CAND is used in IVS.  */
5864
5865 static bool
5866 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5867 {
5868   return ivs->n_cand_uses[cand->id] > 0;
5869 }
5870
5871 /* Returns number of induction variable candidates in the set IVS.  */
5872
5873 static unsigned
5874 iv_ca_n_cands (struct iv_ca *ivs)
5875 {
5876   return ivs->n_cands;
5877 }
5878
5879 /* Free the list of changes DELTA.  */
5880
5881 static void
5882 iv_ca_delta_free (struct iv_ca_delta **delta)
5883 {
5884   struct iv_ca_delta *act, *next;
5885
5886   for (act = *delta; act; act = next)
5887     {
5888       next = act->next;
5889       free (act);
5890     }
5891
5892   *delta = NULL;
5893 }
5894
5895 /* Allocates new iv candidates assignment.  */
5896
5897 static struct iv_ca *
5898 iv_ca_new (struct ivopts_data *data)
5899 {
5900   struct iv_ca *nw = XNEW (struct iv_ca);
5901
5902   nw->upto = 0;
5903   nw->bad_groups = 0;
5904   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
5905                                  data->vgroups.length ());
5906   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
5907   nw->cands = BITMAP_ALLOC (NULL);
5908   nw->n_cands = 0;
5909   nw->n_invs = 0;
5910   nw->cand_use_cost = no_cost;
5911   nw->cand_cost = 0;
5912   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
5913   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
5914   nw->cost = no_cost;
5915
5916   return nw;
5917 }
5918
5919 /* Free memory occupied by the set IVS.  */
5920
5921 static void
5922 iv_ca_free (struct iv_ca **ivs)
5923 {
5924   free ((*ivs)->cand_for_group);
5925   free ((*ivs)->n_cand_uses);
5926   BITMAP_FREE ((*ivs)->cands);
5927   free ((*ivs)->n_inv_var_uses);
5928   free ((*ivs)->n_inv_expr_uses);
5929   free (*ivs);
5930   *ivs = NULL;
5931 }
5932
5933 /* Dumps IVS to FILE.  */
5934
5935 static void
5936 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5937 {
5938   unsigned i;
5939   comp_cost cost = iv_ca_cost (ivs);
5940
5941   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
5942            cost.complexity);
5943   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
5944            ivs->cand_cost, ivs->cand_use_cost.cost,
5945            ivs->cand_use_cost.complexity);
5946   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5947
5948   for (i = 0; i < ivs->upto; i++)
5949     {
5950       struct iv_group *group = data->vgroups[i];
5951       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
5952       if (cp)
5953         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
5954                  group->id, cp->cand->id, cp->cost.cost,
5955                  cp->cost.complexity);
5956       else
5957         fprintf (file, "   group:%d --> ??\n", group->id);
5958     }
5959
5960   const char *pref = "";
5961   fprintf (file, "  invariant variables: ");
5962   for (i = 1; i <= data->max_inv_var_id; i++)
5963     if (ivs->n_inv_var_uses[i])
5964       {
5965         fprintf (file, "%s%d", pref, i);
5966         pref = ", ";
5967       }
5968
5969   pref = "";
5970   fprintf (file, "\n  invariant expressions: ");
5971   for (i = 1; i <= data->max_inv_expr_id; i++)
5972     if (ivs->n_inv_expr_uses[i])
5973       {
5974         fprintf (file, "%s%d", pref, i);
5975         pref = ", ";
5976       }
5977
5978   fprintf (file, "\n\n");
5979 }
5980
5981 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5982    new set, and store differences in DELTA.  Number of induction variables
5983    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5984    the function will try to find a solution with mimimal iv candidates.  */
5985
5986 static comp_cost
5987 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5988               struct iv_cand *cand, struct iv_ca_delta **delta,
5989               unsigned *n_ivs, bool min_ncand)
5990 {
5991   unsigned i;
5992   comp_cost cost;
5993   struct iv_group *group;
5994   struct cost_pair *old_cp, *new_cp;
5995
5996   *delta = NULL;
5997   for (i = 0; i < ivs->upto; i++)
5998     {
5999       group = data->vgroups[i];
6000       old_cp = iv_ca_cand_for_group (ivs, group);
6001
6002       if (old_cp
6003           && old_cp->cand == cand)
6004         continue;
6005
6006       new_cp = get_group_iv_cost (data, group, cand);
6007       if (!new_cp)
6008         continue;
6009
6010       if (!min_ncand && iv_ca_more_deps (data, ivs, group, old_cp, new_cp))
6011         continue;
6012
6013       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6014         continue;
6015
6016       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6017     }
6018
6019   iv_ca_delta_commit (data, ivs, *delta, true);
6020   cost = iv_ca_cost (ivs);
6021   if (n_ivs)
6022     *n_ivs = iv_ca_n_cands (ivs);
6023   iv_ca_delta_commit (data, ivs, *delta, false);
6024
6025   return cost;
6026 }
6027
6028 /* Try narrowing set IVS by removing CAND.  Return the cost of
6029    the new set and store the differences in DELTA.  START is
6030    the candidate with which we start narrowing.  */
6031
6032 static comp_cost
6033 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6034               struct iv_cand *cand, struct iv_cand *start,
6035               struct iv_ca_delta **delta)
6036 {
6037   unsigned i, ci;
6038   struct iv_group *group;
6039   struct cost_pair *old_cp, *new_cp, *cp;
6040   bitmap_iterator bi;
6041   struct iv_cand *cnd;
6042   comp_cost cost, best_cost, acost;
6043
6044   *delta = NULL;
6045   for (i = 0; i < data->vgroups.length (); i++)
6046     {
6047       group = data->vgroups[i];
6048
6049       old_cp = iv_ca_cand_for_group (ivs, group);
6050       if (old_cp->cand != cand)
6051         continue;
6052
6053       best_cost = iv_ca_cost (ivs);
6054       /* Start narrowing with START.  */
6055       new_cp = get_group_iv_cost (data, group, start);
6056
6057       if (data->consider_all_candidates)
6058         {
6059           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6060             {
6061               if (ci == cand->id || (start && ci == start->id))
6062                 continue;
6063
6064               cnd = data->vcands[ci];
6065
6066               cp = get_group_iv_cost (data, group, cnd);
6067               if (!cp)
6068                 continue;
6069
6070               iv_ca_set_cp (data, ivs, group, cp);
6071               acost = iv_ca_cost (ivs);
6072
6073               if (acost < best_cost)
6074                 {
6075                   best_cost = acost;
6076                   new_cp = cp;
6077                 }
6078             }
6079         }
6080       else
6081         {
6082           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6083             {
6084               if (ci == cand->id || (start && ci == start->id))
6085                 continue;
6086
6087               cnd = data->vcands[ci];
6088
6089               cp = get_group_iv_cost (data, group, cnd);
6090               if (!cp)
6091                 continue;
6092
6093               iv_ca_set_cp (data, ivs, group, cp);
6094               acost = iv_ca_cost (ivs);
6095
6096               if (acost < best_cost)
6097                 {
6098                   best_cost = acost;
6099                   new_cp = cp;
6100                 }
6101             }
6102         }
6103       /* Restore to old cp for use.  */
6104       iv_ca_set_cp (data, ivs, group, old_cp);
6105
6106       if (!new_cp)
6107         {
6108           iv_ca_delta_free (delta);
6109           return infinite_cost;
6110         }
6111
6112       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6113     }
6114
6115   iv_ca_delta_commit (data, ivs, *delta, true);
6116   cost = iv_ca_cost (ivs);
6117   iv_ca_delta_commit (data, ivs, *delta, false);
6118
6119   return cost;
6120 }
6121
6122 /* Try optimizing the set of candidates IVS by removing candidates different
6123    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6124    differences in DELTA.  */
6125
6126 static comp_cost
6127 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6128              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6129 {
6130   bitmap_iterator bi;
6131   struct iv_ca_delta *act_delta, *best_delta;
6132   unsigned i;
6133   comp_cost best_cost, acost;
6134   struct iv_cand *cand;
6135
6136   best_delta = NULL;
6137   best_cost = iv_ca_cost (ivs);
6138
6139   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6140     {
6141       cand = data->vcands[i];
6142
6143       if (cand == except_cand)
6144         continue;
6145
6146       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6147
6148       if (acost < best_cost)
6149         {
6150           best_cost = acost;
6151           iv_ca_delta_free (&best_delta);
6152           best_delta = act_delta;
6153         }
6154       else
6155         iv_ca_delta_free (&act_delta);
6156     }
6157
6158   if (!best_delta)
6159     {
6160       *delta = NULL;
6161       return best_cost;
6162     }
6163
6164   /* Recurse to possibly remove other unnecessary ivs.  */
6165   iv_ca_delta_commit (data, ivs, best_delta, true);
6166   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6167   iv_ca_delta_commit (data, ivs, best_delta, false);
6168   *delta = iv_ca_delta_join (best_delta, *delta);
6169   return best_cost;
6170 }
6171
6172 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6173    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6174    the corresponding cost_pair, otherwise just return BEST_CP.  */
6175
6176 static struct cost_pair*
6177 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6178                         unsigned int cand_idx, struct iv_cand *old_cand,
6179                         struct cost_pair *best_cp)
6180 {
6181   struct iv_cand *cand;
6182   struct cost_pair *cp;
6183
6184   gcc_assert (old_cand != NULL && best_cp != NULL);
6185   if (cand_idx == old_cand->id)
6186     return best_cp;
6187
6188   cand = data->vcands[cand_idx];
6189   cp = get_group_iv_cost (data, group, cand);
6190   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6191     return cp;
6192
6193   return best_cp;
6194 }
6195
6196 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6197    which are used by more than one iv uses.  For each of those candidates,
6198    this function tries to represent iv uses under that candidate using
6199    other ones with lower local cost, then tries to prune the new set.
6200    If the new set has lower cost, It returns the new cost after recording
6201    candidate replacement in list DELTA.  */
6202
6203 static comp_cost
6204 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6205                struct iv_ca_delta **delta)
6206 {
6207   bitmap_iterator bi, bj;
6208   unsigned int i, j, k;
6209   struct iv_cand *cand;
6210   comp_cost orig_cost, acost;
6211   struct iv_ca_delta *act_delta, *tmp_delta;
6212   struct cost_pair *old_cp, *best_cp = NULL;
6213
6214   *delta = NULL;
6215   orig_cost = iv_ca_cost (ivs);
6216
6217   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6218     {
6219       if (ivs->n_cand_uses[i] == 1
6220           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6221         continue;
6222
6223       cand = data->vcands[i];
6224
6225       act_delta = NULL;
6226       /*  Represent uses under current candidate using other ones with
6227           lower local cost.  */
6228       for (j = 0; j < ivs->upto; j++)
6229         {
6230           struct iv_group *group = data->vgroups[j];
6231           old_cp = iv_ca_cand_for_group (ivs, group);
6232
6233           if (old_cp->cand != cand)
6234             continue;
6235
6236           best_cp = old_cp;
6237           if (data->consider_all_candidates)
6238             for (k = 0; k < data->vcands.length (); k++)
6239               best_cp = cheaper_cost_with_cand (data, group, k,
6240                                                 old_cp->cand, best_cp);
6241           else
6242             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6243               best_cp = cheaper_cost_with_cand (data, group, k,
6244                                                 old_cp->cand, best_cp);
6245
6246           if (best_cp == old_cp)
6247             continue;
6248
6249           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6250         }
6251       /* No need for further prune.  */
6252       if (!act_delta)
6253         continue;
6254
6255       /* Prune the new candidate set.  */
6256       iv_ca_delta_commit (data, ivs, act_delta, true);
6257       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6258       iv_ca_delta_commit (data, ivs, act_delta, false);
6259       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6260
6261       if (acost < orig_cost)
6262         {
6263           *delta = act_delta;
6264           return acost;
6265         }
6266       else
6267         iv_ca_delta_free (&act_delta);
6268     }
6269
6270   return orig_cost;
6271 }
6272
6273 /* Tries to extend the sets IVS in the best possible way in order to
6274    express the GROUP.  If ORIGINALP is true, prefer candidates from
6275    the original set of IVs, otherwise favor important candidates not
6276    based on any memory object.  */
6277
6278 static bool
6279 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6280                   struct iv_group *group, bool originalp)
6281 {
6282   comp_cost best_cost, act_cost;
6283   unsigned i;
6284   bitmap_iterator bi;
6285   struct iv_cand *cand;
6286   struct iv_ca_delta *best_delta = NULL, *act_delta;
6287   struct cost_pair *cp;
6288
6289   iv_ca_add_group (data, ivs, group);
6290   best_cost = iv_ca_cost (ivs);
6291   cp = iv_ca_cand_for_group (ivs, group);
6292   if (cp)
6293     {
6294       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6295       iv_ca_set_no_cp (data, ivs, group);
6296     }
6297
6298   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6299      first try important candidates not based on any memory object.  Only if
6300      this fails, try the specific ones.  Rationale -- in loops with many
6301      variables the best choice often is to use just one generic biv.  If we
6302      added here many ivs specific to the uses, the optimization algorithm later
6303      would be likely to get stuck in a local minimum, thus causing us to create
6304      too many ivs.  The approach from few ivs to more seems more likely to be
6305      successful -- starting from few ivs, replacing an expensive use by a
6306      specific iv should always be a win.  */
6307   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6308     {
6309       cand = data->vcands[i];
6310
6311       if (originalp && cand->pos !=IP_ORIGINAL)
6312         continue;
6313
6314       if (!originalp && cand->iv->base_object != NULL_TREE)
6315         continue;
6316
6317       if (iv_ca_cand_used_p (ivs, cand))
6318         continue;
6319
6320       cp = get_group_iv_cost (data, group, cand);
6321       if (!cp)
6322         continue;
6323
6324       iv_ca_set_cp (data, ivs, group, cp);
6325       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6326                                true);
6327       iv_ca_set_no_cp (data, ivs, group);
6328       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6329
6330       if (act_cost < best_cost)
6331         {
6332           best_cost = act_cost;
6333
6334           iv_ca_delta_free (&best_delta);
6335           best_delta = act_delta;
6336         }
6337       else
6338         iv_ca_delta_free (&act_delta);
6339     }
6340
6341   if (best_cost.infinite_cost_p ())
6342     {
6343       for (i = 0; i < group->n_map_members; i++)
6344         {
6345           cp = group->cost_map + i;
6346           cand = cp->cand;
6347           if (!cand)
6348             continue;
6349
6350           /* Already tried this.  */
6351           if (cand->important)
6352             {
6353               if (originalp && cand->pos == IP_ORIGINAL)
6354                 continue;
6355               if (!originalp && cand->iv->base_object == NULL_TREE)
6356                 continue;
6357             }
6358
6359           if (iv_ca_cand_used_p (ivs, cand))
6360             continue;
6361
6362           act_delta = NULL;
6363           iv_ca_set_cp (data, ivs, group, cp);
6364           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6365           iv_ca_set_no_cp (data, ivs, group);
6366           act_delta = iv_ca_delta_add (group,
6367                                        iv_ca_cand_for_group (ivs, group),
6368                                        cp, act_delta);
6369
6370           if (act_cost < best_cost)
6371             {
6372               best_cost = act_cost;
6373
6374               if (best_delta)
6375                 iv_ca_delta_free (&best_delta);
6376               best_delta = act_delta;
6377             }
6378           else
6379             iv_ca_delta_free (&act_delta);
6380         }
6381     }
6382
6383   iv_ca_delta_commit (data, ivs, best_delta, true);
6384   iv_ca_delta_free (&best_delta);
6385
6386   return !best_cost.infinite_cost_p ();
6387 }
6388
6389 /* Finds an initial assignment of candidates to uses.  */
6390
6391 static struct iv_ca *
6392 get_initial_solution (struct ivopts_data *data, bool originalp)
6393 {
6394   unsigned i;
6395   struct iv_ca *ivs = iv_ca_new (data);
6396
6397   for (i = 0; i < data->vgroups.length (); i++)
6398     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6399       {
6400         iv_ca_free (&ivs);
6401         return NULL;
6402       }
6403
6404   return ivs;
6405 }
6406
6407 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6408    points to a bool variable, this function tries to break local
6409    optimal fixed-point by replacing candidates in IVS if it's true.  */
6410
6411 static bool
6412 try_improve_iv_set (struct ivopts_data *data,
6413                     struct iv_ca *ivs, bool *try_replace_p)
6414 {
6415   unsigned i, n_ivs;
6416   comp_cost acost, best_cost = iv_ca_cost (ivs);
6417   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6418   struct iv_cand *cand;
6419
6420   /* Try extending the set of induction variables by one.  */
6421   for (i = 0; i < data->vcands.length (); i++)
6422     {
6423       cand = data->vcands[i];
6424
6425       if (iv_ca_cand_used_p (ivs, cand))
6426         continue;
6427
6428       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6429       if (!act_delta)
6430         continue;
6431
6432       /* If we successfully added the candidate and the set is small enough,
6433          try optimizing it by removing other candidates.  */
6434       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6435         {
6436           iv_ca_delta_commit (data, ivs, act_delta, true);
6437           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6438           iv_ca_delta_commit (data, ivs, act_delta, false);
6439           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6440         }
6441
6442       if (acost < best_cost)
6443         {
6444           best_cost = acost;
6445           iv_ca_delta_free (&best_delta);
6446           best_delta = act_delta;
6447         }
6448       else
6449         iv_ca_delta_free (&act_delta);
6450     }
6451
6452   if (!best_delta)
6453     {
6454       /* Try removing the candidates from the set instead.  */
6455       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6456
6457       if (!best_delta && *try_replace_p)
6458         {
6459           *try_replace_p = false;
6460           /* So far candidate selecting algorithm tends to choose fewer IVs
6461              so that it can handle cases in which loops have many variables
6462              but the best choice is often to use only one general biv.  One
6463              weakness is it can't handle opposite cases, in which different
6464              candidates should be chosen with respect to each use.  To solve
6465              the problem, we replace candidates in a manner described by the
6466              comments of iv_ca_replace, thus give general algorithm a chance
6467              to break local optimal fixed-point in these cases.  */
6468           best_cost = iv_ca_replace (data, ivs, &best_delta);
6469         }
6470
6471       if (!best_delta)
6472         return false;
6473     }
6474
6475   iv_ca_delta_commit (data, ivs, best_delta, true);
6476   gcc_assert (best_cost == iv_ca_cost (ivs));
6477   iv_ca_delta_free (&best_delta);
6478   return true;
6479 }
6480
6481 /* Attempts to find the optimal set of induction variables.  We do simple
6482    greedy heuristic -- we try to replace at most one candidate in the selected
6483    solution and remove the unused ivs while this improves the cost.  */
6484
6485 static struct iv_ca *
6486 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6487 {
6488   struct iv_ca *set;
6489   bool try_replace_p = true;
6490
6491   /* Get the initial solution.  */
6492   set = get_initial_solution (data, originalp);
6493   if (!set)
6494     {
6495       if (dump_file && (dump_flags & TDF_DETAILS))
6496         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6497       return NULL;
6498     }
6499
6500   if (dump_file && (dump_flags & TDF_DETAILS))
6501     {
6502       fprintf (dump_file, "Initial set of candidates:\n");
6503       iv_ca_dump (data, dump_file, set);
6504     }
6505
6506   while (try_improve_iv_set (data, set, &try_replace_p))
6507     {
6508       if (dump_file && (dump_flags & TDF_DETAILS))
6509         {
6510           fprintf (dump_file, "Improved to:\n");
6511           iv_ca_dump (data, dump_file, set);
6512         }
6513     }
6514
6515   return set;
6516 }
6517
6518 static struct iv_ca *
6519 find_optimal_iv_set (struct ivopts_data *data)
6520 {
6521   unsigned i;
6522   comp_cost cost, origcost;
6523   struct iv_ca *set, *origset;
6524
6525   /* Determine the cost based on a strategy that starts with original IVs,
6526      and try again using a strategy that prefers candidates not based
6527      on any IVs.  */
6528   origset = find_optimal_iv_set_1 (data, true);
6529   set = find_optimal_iv_set_1 (data, false);
6530
6531   if (!origset && !set)
6532     return NULL;
6533
6534   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6535   cost = set ? iv_ca_cost (set) : infinite_cost;
6536
6537   if (dump_file && (dump_flags & TDF_DETAILS))
6538     {
6539       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6540                origcost.cost, origcost.complexity);
6541       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6542                cost.cost, cost.complexity);
6543     }
6544
6545   /* Choose the one with the best cost.  */
6546   if (origcost <= cost)
6547     {
6548       if (set)
6549         iv_ca_free (&set);
6550       set = origset;
6551     }
6552   else if (origset)
6553     iv_ca_free (&origset);
6554
6555   for (i = 0; i < data->vgroups.length (); i++)
6556     {
6557       struct iv_group *group = data->vgroups[i];
6558       group->selected = iv_ca_cand_for_group (set, group)->cand;
6559     }
6560
6561   return set;
6562 }
6563
6564 /* Creates a new induction variable corresponding to CAND.  */
6565
6566 static void
6567 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6568 {
6569   gimple_stmt_iterator incr_pos;
6570   tree base;
6571   struct iv_use *use;
6572   struct iv_group *group;
6573   bool after = false;
6574
6575   gcc_assert (cand->iv != NULL);
6576
6577   switch (cand->pos)
6578     {
6579     case IP_NORMAL:
6580       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6581       break;
6582
6583     case IP_END:
6584       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6585       after = true;
6586       break;
6587
6588     case IP_AFTER_USE:
6589       after = true;
6590       /* fall through */
6591     case IP_BEFORE_USE:
6592       incr_pos = gsi_for_stmt (cand->incremented_at);
6593       break;
6594
6595     case IP_ORIGINAL:
6596       /* Mark that the iv is preserved.  */
6597       name_info (data, cand->var_before)->preserve_biv = true;
6598       name_info (data, cand->var_after)->preserve_biv = true;
6599
6600       /* Rewrite the increment so that it uses var_before directly.  */
6601       use = find_interesting_uses_op (data, cand->var_after);
6602       group = data->vgroups[use->group_id];
6603       group->selected = cand;
6604       return;
6605     }
6606
6607   gimple_add_tmp_var (cand->var_before);
6608
6609   base = unshare_expr (cand->iv->base);
6610
6611   create_iv (base, unshare_expr (cand->iv->step),
6612              cand->var_before, data->current_loop,
6613              &incr_pos, after, &cand->var_before, &cand->var_after);
6614 }
6615
6616 /* Creates new induction variables described in SET.  */
6617
6618 static void
6619 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6620 {
6621   unsigned i;
6622   struct iv_cand *cand;
6623   bitmap_iterator bi;
6624
6625   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6626     {
6627       cand = data->vcands[i];
6628       create_new_iv (data, cand);
6629     }
6630
6631   if (dump_file && (dump_flags & TDF_DETAILS))
6632     {
6633       fprintf (dump_file, "Selected IV set for loop %d",
6634                data->current_loop->num);
6635       if (data->loop_loc != UNKNOWN_LOCATION)
6636         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6637                  LOCATION_LINE (data->loop_loc));
6638       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6639                avg_loop_niter (data->current_loop));
6640       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6641       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6642         {
6643           cand = data->vcands[i];
6644           dump_cand (dump_file, cand);
6645         }
6646       fprintf (dump_file, "\n");
6647     }
6648 }
6649
6650 /* Rewrites USE (definition of iv used in a nonlinear expression)
6651    using candidate CAND.  */
6652
6653 static void
6654 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6655                             struct iv_use *use, struct iv_cand *cand)
6656 {
6657   tree comp;
6658   tree tgt;
6659   gassign *ass;
6660   gimple_stmt_iterator bsi;
6661
6662   /* An important special case -- if we are asked to express value of
6663      the original iv by itself, just exit; there is no need to
6664      introduce a new computation (that might also need casting the
6665      variable to unsigned and back).  */
6666   if (cand->pos == IP_ORIGINAL
6667       && cand->incremented_at == use->stmt)
6668     {
6669       tree op = NULL_TREE;
6670       enum tree_code stmt_code;
6671
6672       gcc_assert (is_gimple_assign (use->stmt));
6673       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6674
6675       /* Check whether we may leave the computation unchanged.
6676          This is the case only if it does not rely on other
6677          computations in the loop -- otherwise, the computation
6678          we rely upon may be removed in remove_unused_ivs,
6679          thus leading to ICE.  */
6680       stmt_code = gimple_assign_rhs_code (use->stmt);
6681       if (stmt_code == PLUS_EXPR
6682           || stmt_code == MINUS_EXPR
6683           || stmt_code == POINTER_PLUS_EXPR)
6684         {
6685           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6686             op = gimple_assign_rhs2 (use->stmt);
6687           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6688             op = gimple_assign_rhs1 (use->stmt);
6689         }
6690
6691       if (op != NULL_TREE)
6692         {
6693           if (expr_invariant_in_loop_p (data->current_loop, op))
6694             return;
6695           if (TREE_CODE (op) == SSA_NAME)
6696             {
6697               struct iv *iv = get_iv (data, op);
6698               if (iv != NULL && integer_zerop (iv->step))
6699                 return;
6700             }
6701         }
6702     }
6703
6704   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
6705   gcc_assert (comp != NULL_TREE);
6706
6707   switch (gimple_code (use->stmt))
6708     {
6709     case GIMPLE_PHI:
6710       tgt = PHI_RESULT (use->stmt);
6711
6712       /* If we should keep the biv, do not replace it.  */
6713       if (name_info (data, tgt)->preserve_biv)
6714         return;
6715
6716       bsi = gsi_after_labels (gimple_bb (use->stmt));
6717       break;
6718
6719     case GIMPLE_ASSIGN:
6720       tgt = gimple_assign_lhs (use->stmt);
6721       bsi = gsi_for_stmt (use->stmt);
6722       break;
6723
6724     default:
6725       gcc_unreachable ();
6726     }
6727
6728   if (!valid_gimple_rhs_p (comp)
6729       || (gimple_code (use->stmt) != GIMPLE_PHI
6730           /* We can't allow re-allocating the stmt as it might be pointed
6731              to still.  */
6732           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6733               >= gimple_num_ops (gsi_stmt (bsi)))))
6734     {
6735       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6736                                        true, GSI_SAME_STMT);
6737       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6738         {
6739           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6740           /* As this isn't a plain copy we have to reset alignment
6741              information.  */
6742           if (SSA_NAME_PTR_INFO (comp))
6743             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6744         }
6745     }
6746
6747   if (gimple_code (use->stmt) == GIMPLE_PHI)
6748     {
6749       ass = gimple_build_assign (tgt, comp);
6750       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6751
6752       bsi = gsi_for_stmt (use->stmt);
6753       remove_phi_node (&bsi, false);
6754     }
6755   else
6756     {
6757       gimple_assign_set_rhs_from_tree (&bsi, comp);
6758       use->stmt = gsi_stmt (bsi);
6759     }
6760 }
6761
6762 /* Performs a peephole optimization to reorder the iv update statement with
6763    a mem ref to enable instruction combining in later phases. The mem ref uses
6764    the iv value before the update, so the reordering transformation requires
6765    adjustment of the offset. CAND is the selected IV_CAND.
6766
6767    Example:
6768
6769    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6770    iv2 = iv1 + 1;
6771
6772    if (t < val)      (1)
6773      goto L;
6774    goto Head;
6775
6776
6777    directly propagating t over to (1) will introduce overlapping live range
6778    thus increase register pressure. This peephole transform it into:
6779
6780
6781    iv2 = iv1 + 1;
6782    t = MEM_REF (base, iv2, 8, 8);
6783    if (t < val)
6784      goto L;
6785    goto Head;
6786 */
6787
6788 static void
6789 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6790 {
6791   tree var_after;
6792   gimple *iv_update, *stmt;
6793   basic_block bb;
6794   gimple_stmt_iterator gsi, gsi_iv;
6795
6796   if (cand->pos != IP_NORMAL)
6797     return;
6798
6799   var_after = cand->var_after;
6800   iv_update = SSA_NAME_DEF_STMT (var_after);
6801
6802   bb = gimple_bb (iv_update);
6803   gsi = gsi_last_nondebug_bb (bb);
6804   stmt = gsi_stmt (gsi);
6805
6806   /* Only handle conditional statement for now.  */
6807   if (gimple_code (stmt) != GIMPLE_COND)
6808     return;
6809
6810   gsi_prev_nondebug (&gsi);
6811   stmt = gsi_stmt (gsi);
6812   if (stmt != iv_update)
6813     return;
6814
6815   gsi_prev_nondebug (&gsi);
6816   if (gsi_end_p (gsi))
6817     return;
6818
6819   stmt = gsi_stmt (gsi);
6820   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6821     return;
6822
6823   if (stmt != use->stmt)
6824     return;
6825
6826   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6827     return;
6828
6829   if (dump_file && (dump_flags & TDF_DETAILS))
6830     {
6831       fprintf (dump_file, "Reordering \n");
6832       print_gimple_stmt (dump_file, iv_update, 0, 0);
6833       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6834       fprintf (dump_file, "\n");
6835     }
6836
6837   gsi = gsi_for_stmt (use->stmt);
6838   gsi_iv = gsi_for_stmt (iv_update);
6839   gsi_move_before (&gsi_iv, &gsi);
6840
6841   cand->pos = IP_BEFORE_USE;
6842   cand->incremented_at = use->stmt;
6843 }
6844
6845 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6846
6847 static void
6848 rewrite_use_address (struct ivopts_data *data,
6849                      struct iv_use *use, struct iv_cand *cand)
6850 {
6851   aff_tree aff;
6852   bool ok;
6853
6854   adjust_iv_update_pos (cand, use);
6855   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
6856   gcc_assert (ok);
6857   unshare_aff_combination (&aff);
6858
6859   /* To avoid undefined overflow problems, all IV candidates use unsigned
6860      integer types.  The drawback is that this makes it impossible for
6861      create_mem_ref to distinguish an IV that is based on a memory object
6862      from one that represents simply an offset.
6863
6864      To work around this problem, we pass a hint to create_mem_ref that
6865      indicates which variable (if any) in aff is an IV based on a memory
6866      object.  Note that we only consider the candidate.  If this is not
6867      based on an object, the base of the reference is in some subexpression
6868      of the use -- but these will use pointer types, so they are recognized
6869      by the create_mem_ref heuristics anyway.  */
6870   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
6871   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
6872   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6873   tree type = TREE_TYPE (*use->op_p);
6874   unsigned int align = get_object_alignment (*use->op_p);
6875   if (align != TYPE_ALIGN (type))
6876     type = build_aligned_type (type, align);
6877
6878   tree ref = create_mem_ref (&bsi, type, &aff,
6879                              reference_alias_ptr_type (*use->op_p),
6880                              iv, base_hint, data->speed);
6881
6882   copy_ref_info (ref, *use->op_p);
6883   *use->op_p = ref;
6884 }
6885
6886 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6887    candidate CAND.  */
6888
6889 static void
6890 rewrite_use_compare (struct ivopts_data *data,
6891                      struct iv_use *use, struct iv_cand *cand)
6892 {
6893   tree comp, *var_p, op, bound;
6894   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6895   enum tree_code compare;
6896   struct iv_group *group = data->vgroups[use->group_id];
6897   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
6898   bool ok;
6899
6900   bound = cp->value;
6901   if (bound)
6902     {
6903       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6904       tree var_type = TREE_TYPE (var);
6905       gimple_seq stmts;
6906
6907       if (dump_file && (dump_flags & TDF_DETAILS))
6908         {
6909           fprintf (dump_file, "Replacing exit test: ");
6910           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6911         }
6912       compare = cp->comp;
6913       bound = unshare_expr (fold_convert (var_type, bound));
6914       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6915       if (stmts)
6916         gsi_insert_seq_on_edge_immediate (
6917                 loop_preheader_edge (data->current_loop),
6918                 stmts);
6919
6920       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6921       gimple_cond_set_lhs (cond_stmt, var);
6922       gimple_cond_set_code (cond_stmt, compare);
6923       gimple_cond_set_rhs (cond_stmt, op);
6924       return;
6925     }
6926
6927   /* The induction variable elimination failed; just express the original
6928      giv.  */
6929   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
6930   gcc_assert (comp != NULL_TREE);
6931
6932   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6933   gcc_assert (ok);
6934
6935   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6936                                      true, GSI_SAME_STMT);
6937 }
6938
6939 /* Rewrite the groups using the selected induction variables.  */
6940
6941 static void
6942 rewrite_groups (struct ivopts_data *data)
6943 {
6944   unsigned i, j;
6945
6946   for (i = 0; i < data->vgroups.length (); i++)
6947     {
6948       struct iv_group *group = data->vgroups[i];
6949       struct iv_cand *cand = group->selected;
6950
6951       gcc_assert (cand);
6952
6953       if (group->type == USE_NONLINEAR_EXPR)
6954         {
6955           for (j = 0; j < group->vuses.length (); j++)
6956             {
6957               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
6958               update_stmt (group->vuses[j]->stmt);
6959             }
6960         }
6961       else if (group->type == USE_ADDRESS)
6962         {
6963           for (j = 0; j < group->vuses.length (); j++)
6964             {
6965               rewrite_use_address (data, group->vuses[j], cand);
6966               update_stmt (group->vuses[j]->stmt);
6967             }
6968         }
6969       else
6970         {
6971           gcc_assert (group->type == USE_COMPARE);
6972
6973           for (j = 0; j < group->vuses.length (); j++)
6974             {
6975               rewrite_use_compare (data, group->vuses[j], cand);
6976               update_stmt (group->vuses[j]->stmt);
6977             }
6978         }
6979     }
6980 }
6981
6982 /* Removes the ivs that are not used after rewriting.  */
6983
6984 static void
6985 remove_unused_ivs (struct ivopts_data *data)
6986 {
6987   unsigned j;
6988   bitmap_iterator bi;
6989   bitmap toremove = BITMAP_ALLOC (NULL);
6990
6991   /* Figure out an order in which to release SSA DEFs so that we don't
6992      release something that we'd have to propagate into a debug stmt
6993      afterwards.  */
6994   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6995     {
6996       struct version_info *info;
6997
6998       info = ver_info (data, j);
6999       if (info->iv
7000           && !integer_zerop (info->iv->step)
7001           && !info->inv_id
7002           && !info->iv->nonlin_use
7003           && !info->preserve_biv)
7004         {
7005           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7006
7007           tree def = info->iv->ssa_name;
7008
7009           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7010             {
7011               imm_use_iterator imm_iter;
7012               use_operand_p use_p;
7013               gimple *stmt;
7014               int count = 0;
7015
7016               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7017                 {
7018                   if (!gimple_debug_bind_p (stmt))
7019                     continue;
7020
7021                   /* We just want to determine whether to do nothing
7022                      (count == 0), to substitute the computed
7023                      expression into a single use of the SSA DEF by
7024                      itself (count == 1), or to use a debug temp
7025                      because the SSA DEF is used multiple times or as
7026                      part of a larger expression (count > 1). */
7027                   count++;
7028                   if (gimple_debug_bind_get_value (stmt) != def)
7029                     count++;
7030
7031                   if (count > 1)
7032                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7033                 }
7034
7035               if (!count)
7036                 continue;
7037
7038               struct iv_use dummy_use;
7039               struct iv_cand *best_cand = NULL, *cand;
7040               unsigned i, best_pref = 0, cand_pref;
7041
7042               memset (&dummy_use, 0, sizeof (dummy_use));
7043               dummy_use.iv = info->iv;
7044               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7045                 {
7046                   cand = data->vgroups[i]->selected;
7047                   if (cand == best_cand)
7048                     continue;
7049                   cand_pref = operand_equal_p (cand->iv->step,
7050                                                info->iv->step, 0)
7051                     ? 4 : 0;
7052                   cand_pref
7053                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7054                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7055                     ? 2 : 0;
7056                   cand_pref
7057                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7058                     ? 1 : 0;
7059                   if (best_cand == NULL || best_pref < cand_pref)
7060                     {
7061                       best_cand = cand;
7062                       best_pref = cand_pref;
7063                     }
7064                 }
7065
7066               if (!best_cand)
7067                 continue;
7068
7069               tree comp = get_computation_at (data->current_loop,
7070                                               SSA_NAME_DEF_STMT (def),
7071                                               &dummy_use, best_cand);
7072               if (!comp)
7073                 continue;
7074
7075               if (count > 1)
7076                 {
7077                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7078                   DECL_ARTIFICIAL (vexpr) = 1;
7079                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7080                   if (SSA_NAME_VAR (def))
7081                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7082                   else
7083                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7084                   gdebug *def_temp
7085                     = gimple_build_debug_bind (vexpr, comp, NULL);
7086                   gimple_stmt_iterator gsi;
7087
7088                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7089                     gsi = gsi_after_labels (gimple_bb
7090                                             (SSA_NAME_DEF_STMT (def)));
7091                   else
7092                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7093
7094                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7095                   comp = vexpr;
7096                 }
7097
7098               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7099                 {
7100                   if (!gimple_debug_bind_p (stmt))
7101                     continue;
7102
7103                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7104                     SET_USE (use_p, comp);
7105
7106                   update_stmt (stmt);
7107                 }
7108             }
7109         }
7110     }
7111
7112   release_defs_bitset (toremove);
7113
7114   BITMAP_FREE (toremove);
7115 }
7116
7117 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7118    for hash_map::traverse.  */
7119
7120 bool
7121 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7122 {
7123   free (value);
7124   return true;
7125 }
7126
7127 /* Frees data allocated by the optimization of a single loop.  */
7128
7129 static void
7130 free_loop_data (struct ivopts_data *data)
7131 {
7132   unsigned i, j;
7133   bitmap_iterator bi;
7134   tree obj;
7135
7136   if (data->niters)
7137     {
7138       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7139       delete data->niters;
7140       data->niters = NULL;
7141     }
7142
7143   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7144     {
7145       struct version_info *info;
7146
7147       info = ver_info (data, i);
7148       info->iv = NULL;
7149       info->has_nonlin_use = false;
7150       info->preserve_biv = false;
7151       info->inv_id = 0;
7152     }
7153   bitmap_clear (data->relevant);
7154   bitmap_clear (data->important_candidates);
7155
7156   for (i = 0; i < data->vgroups.length (); i++)
7157     {
7158       struct iv_group *group = data->vgroups[i];
7159
7160       for (j = 0; j < group->vuses.length (); j++)
7161         free (group->vuses[j]);
7162       group->vuses.release ();
7163
7164       BITMAP_FREE (group->related_cands);
7165       for (j = 0; j < group->n_map_members; j++)
7166         {
7167           if (group->cost_map[j].inv_vars)
7168             BITMAP_FREE (group->cost_map[j].inv_vars);
7169           if (group->cost_map[j].inv_exprs)
7170             BITMAP_FREE (group->cost_map[j].inv_exprs);
7171         }
7172
7173       free (group->cost_map);
7174       free (group);
7175     }
7176   data->vgroups.truncate (0);
7177
7178   for (i = 0; i < data->vcands.length (); i++)
7179     {
7180       struct iv_cand *cand = data->vcands[i];
7181
7182       if (cand->inv_vars)
7183         BITMAP_FREE (cand->inv_vars);
7184       free (cand);
7185     }
7186   data->vcands.truncate (0);
7187
7188   if (data->version_info_size < num_ssa_names)
7189     {
7190       data->version_info_size = 2 * num_ssa_names;
7191       free (data->version_info);
7192       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7193     }
7194
7195   data->max_inv_var_id = 0;
7196   data->max_inv_expr_id = 0;
7197
7198   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7199     SET_DECL_RTL (obj, NULL_RTX);
7200
7201   decl_rtl_to_reset.truncate (0);
7202
7203   data->inv_expr_tab->empty ();
7204
7205   data->iv_common_cand_tab->empty ();
7206   data->iv_common_cands.truncate (0);
7207 }
7208
7209 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7210    loop tree.  */
7211
7212 static void
7213 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7214 {
7215   free_loop_data (data);
7216   free (data->version_info);
7217   BITMAP_FREE (data->relevant);
7218   BITMAP_FREE (data->important_candidates);
7219
7220   decl_rtl_to_reset.release ();
7221   data->vgroups.release ();
7222   data->vcands.release ();
7223   delete data->inv_expr_tab;
7224   data->inv_expr_tab = NULL;
7225   free_affine_expand_cache (&data->name_expansion_cache);
7226   delete data->iv_common_cand_tab;
7227   data->iv_common_cand_tab = NULL;
7228   data->iv_common_cands.release ();
7229   obstack_free (&data->iv_obstack, NULL);
7230 }
7231
7232 /* Returns true if the loop body BODY includes any function calls.  */
7233
7234 static bool
7235 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7236 {
7237   gimple_stmt_iterator gsi;
7238   unsigned i;
7239
7240   for (i = 0; i < num_nodes; i++)
7241     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7242       {
7243         gimple *stmt = gsi_stmt (gsi);
7244         if (is_gimple_call (stmt)
7245             && !gimple_call_internal_p (stmt)
7246             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7247           return true;
7248       }
7249   return false;
7250 }
7251
7252 /* Optimizes the LOOP.  Returns true if anything changed.  */
7253
7254 static bool
7255 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7256 {
7257   bool changed = false;
7258   struct iv_ca *iv_ca;
7259   edge exit = single_dom_exit (loop);
7260   basic_block *body;
7261
7262   gcc_assert (!data->niters);
7263   data->current_loop = loop;
7264   data->loop_loc = find_loop_location (loop);
7265   data->speed = optimize_loop_for_speed_p (loop);
7266
7267   if (dump_file && (dump_flags & TDF_DETAILS))
7268     {
7269       fprintf (dump_file, "Processing loop %d", loop->num);
7270       if (data->loop_loc != UNKNOWN_LOCATION)
7271         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7272                  LOCATION_LINE (data->loop_loc));
7273       fprintf (dump_file, "\n");
7274
7275       if (exit)
7276         {
7277           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7278                    exit->src->index, exit->dest->index);
7279           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7280           fprintf (dump_file, "\n");
7281         }
7282
7283       fprintf (dump_file, "\n");
7284     }
7285
7286   body = get_loop_body (loop);
7287   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7288   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7289   free (body);
7290
7291   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7292
7293   /* For each ssa name determines whether it behaves as an induction variable
7294      in some loop.  */
7295   if (!find_induction_variables (data))
7296     goto finish;
7297
7298   /* Finds interesting uses (item 1).  */
7299   find_interesting_uses (data);
7300   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7301     goto finish;
7302
7303   /* Finds candidates for the induction variables (item 2).  */
7304   find_iv_candidates (data);
7305
7306   /* Calculates the costs (item 3, part 1).  */
7307   determine_iv_costs (data);
7308   determine_group_iv_costs (data);
7309   determine_set_costs (data);
7310
7311   /* Find the optimal set of induction variables (item 3, part 2).  */
7312   iv_ca = find_optimal_iv_set (data);
7313   if (!iv_ca)
7314     goto finish;
7315   changed = true;
7316
7317   /* Create the new induction variables (item 4, part 1).  */
7318   create_new_ivs (data, iv_ca);
7319   iv_ca_free (&iv_ca);
7320
7321   /* Rewrite the uses (item 4, part 2).  */
7322   rewrite_groups (data);
7323
7324   /* Remove the ivs that are unused after rewriting.  */
7325   remove_unused_ivs (data);
7326
7327   /* We have changed the structure of induction variables; it might happen
7328      that definitions in the scev database refer to some of them that were
7329      eliminated.  */
7330   scev_reset ();
7331
7332 finish:
7333   free_loop_data (data);
7334
7335   return changed;
7336 }
7337
7338 /* Main entry point.  Optimizes induction variables in loops.  */
7339
7340 void
7341 tree_ssa_iv_optimize (void)
7342 {
7343   struct loop *loop;
7344   struct ivopts_data data;
7345
7346   tree_ssa_iv_optimize_init (&data);
7347
7348   /* Optimize the loops starting with the innermost ones.  */
7349   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7350     {
7351       if (dump_file && (dump_flags & TDF_DETAILS))
7352         flow_loop_dump (loop, dump_file, NULL, 1);
7353
7354       tree_ssa_iv_optimize_loop (&data, loop);
7355     }
7356
7357   tree_ssa_iv_optimize_finalize (&data);
7358 }
7359
7360 #include "gt-tree-ssa-loop-ivopts.h"