gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "cfghooks.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "rtl.h"
  72 #include "ssa.h"
  73 #include "alias.h"
  74 #include "fold-const.h"
  75 #include "stor-layout.h"
  76 #include "tm_p.h"
  77 #include "gimple-pretty-print.h"
  78 #include "internal-fn.h"
  79 #include "tree-eh.h"
  80 #include "gimplify.h"
  81 #include "gimple-iterator.h"
  82 #include "gimplify-me.h"
  83 #include "cgraph.h"
  84 #include "tree-cfg.h"
  85 #include "tree-ssa-loop-ivopts.h"
  86 #include "tree-ssa-loop-manip.h"
  87 #include "tree-ssa-loop-niter.h"
  88 #include "tree-ssa-loop.h"
  89 #include "flags.h"
  90 #include "insn-config.h"
  91 #include "expmed.h"
  92 #include "dojump.h"
  93 #include "explow.h"
  94 #include "calls.h"
  95 #include "emit-rtl.h"
  96 #include "varasm.h"
  97 #include "stmt.h"
  98 #include "expr.h"
  99 #include "tree-dfa.h"
 100 #include "tree-ssa.h"
 101 #include "cfgloop.h"
 102 #include "tree-pass.h"
 103 #include "tree-chrec.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "langhooks.h"
 107 #include "tree-affine.h"
 108 #include "target.h"
 109 #include "tree-inline.h"
 110 #include "tree-ssa-propagate.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113 #include "tree-vectorizer.h"
 114
 115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 116    cost of different addressing modes.  This should be moved to a TBD
 117    interface between the GIMPLE and RTL worlds.  */
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   unsigned use_id;      /* The identifier in the use if it is the case.  */
 147   bool biv_p;           /* Is it a biv?  */
 148   bool have_use_for;    /* Do we already have a use for it?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   int cost;             /* The runtime cost.  */
 177   unsigned complexity;  /* The estimate of the complexity of the code for
 178                            the computation (in no concrete units --
 179                            complexity field should be larger for more
 180                            complex expressions and addressing modes).  */
 181 };
 182
 183 static const comp_cost no_cost = {0, 0};
 184 static const comp_cost infinite_cost = {INFTY, INFTY};
 185
 186 /* The candidate - cost pair.  */
 187 struct cost_pair
 188 {
 189   struct iv_cand *cand; /* The candidate.  */
 190   comp_cost cost;       /* The cost.  */
 191   bitmap depends_on;    /* The list of invariants that have to be
 192                            preserved.  */
 193   tree value;           /* For final value elimination, the expression for
 194                            the final value of the iv.  For iv elimination,
 195                            the new bound to compare with.  */
 196   enum tree_code comp;  /* For iv elimination, the comparison.  */
 197   int inv_expr_id;      /* Loop invariant expression id.  */
 198 };
 199
 200 /* Use.  */
 201 struct iv_use
 202 {
 203   unsigned id;          /* The id of the use.  */
 204   unsigned sub_id;      /* The id of the sub use.  */
 205   enum use_type type;   /* Type of the use.  */
 206   struct iv *iv;        /* The induction variable it is based on.  */
 207   gimple *stmt;         /* Statement in that it occurs.  */
 208   tree *op_p;           /* The place where it occurs.  */
 209   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 210                            important ones.  */
 211
 212   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 213   struct cost_pair *cost_map;
 214                         /* The costs wrto the iv candidates.  */
 215
 216   struct iv_cand *selected;
 217                         /* The selected candidate.  */
 218
 219   struct iv_use *next;  /* The next sub use.  */
 220   tree addr_base;       /* Base address with const offset stripped.  */
 221   unsigned HOST_WIDE_INT addr_offset;
 222                         /* Const offset stripped from base address.  */
 223 };
 224
 225 /* The position where the iv is computed.  */
 226 enum iv_position
 227 {
 228   IP_NORMAL,            /* At the end, just before the exit condition.  */
 229   IP_END,               /* At the end of the latch block.  */
 230   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 231   IP_AFTER_USE,         /* Immediately after a specific use.  */
 232   IP_ORIGINAL           /* The original biv.  */
 233 };
 234
 235 /* The induction variable candidate.  */
 236 struct iv_cand
 237 {
 238   unsigned id;          /* The number of the candidate.  */
 239   bool important;       /* Whether this is an "important" candidate, i.e. such
 240                            that it should be considered by all uses.  */
 241   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 242   gimple *incremented_at;/* For original biv, the statement where it is
 243                            incremented.  */
 244   tree var_before;      /* The variable used for it before increment.  */
 245   tree var_after;       /* The variable used for it after increment.  */
 246   struct iv *iv;        /* The value of the candidate.  NULL for
 247                            "pseudocandidate" used to indicate the possibility
 248                            to replace the final value of an iv by direct
 249                            computation of the value.  */
 250   unsigned cost;        /* Cost of the candidate.  */
 251   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 252   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 253                               where it is incremented.  */
 254   bitmap depends_on;    /* The list of invariants that are used in step of the
 255                            biv.  */
 256   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 257                            smaller type.  */
 258 };
 259
 260 /* Loop invariant expression hashtable entry.  */
 261 struct iv_inv_expr_ent
 262 {
 263   tree expr;
 264   int id;
 265   hashval_t hash;
 266 };
 267
 268 /* The data used by the induction variable optimizations.  */
 269
 270 typedef struct iv_use *iv_use_p;
 271
 272 typedef struct iv_cand *iv_cand_p;
 273
 274 /* Hashtable helpers.  */
 275
 276 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 277 {
 278   static inline hashval_t hash (const iv_inv_expr_ent *);
 279   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 280 };
 281
 282 /* Hash function for loop invariant expressions.  */
 283
 284 inline hashval_t
 285 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 286 {
 287   return expr->hash;
 288 }
 289
 290 /* Hash table equality function for expressions.  */
 291
 292 inline bool
 293 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 294                            const iv_inv_expr_ent *expr2)
 295 {
 296   return expr1->hash == expr2->hash
 297          && operand_equal_p (expr1->expr, expr2->expr, 0);
 298 }
 299
 300 struct ivopts_data
 301 {
 302   /* The currently optimized loop.  */
 303   struct loop *current_loop;
 304   source_location loop_loc;
 305
 306   /* Numbers of iterations for all exits of the current loop.  */
 307   hash_map<edge, tree_niter_desc *> *niters;
 308
 309   /* Number of registers used in it.  */
 310   unsigned regs_used;
 311
 312   /* The size of version_info array allocated.  */
 313   unsigned version_info_size;
 314
 315   /* The array of information for the ssa names.  */
 316   struct version_info *version_info;
 317
 318   /* The hashtable of loop invariant expressions created
 319      by ivopt.  */
 320   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 321
 322   /* Loop invariant expression id.  */
 323   int inv_expr_id;
 324
 325   /* The bitmap of indices in version_info whose value was changed.  */
 326   bitmap relevant;
 327
 328   /* The uses of induction variables.  */
 329   vec<iv_use_p> iv_uses;
 330
 331   /* The candidates.  */
 332   vec<iv_cand_p> iv_candidates;
 333
 334   /* A bitmap of important candidates.  */
 335   bitmap important_candidates;
 336
 337   /* Cache used by tree_to_aff_combination_expand.  */
 338   hash_map<tree, name_expansion *> *name_expansion_cache;
 339
 340   /* The maximum invariant id.  */
 341   unsigned max_inv_id;
 342
 343   /* Number of no_overflow BIVs which are not used in memory address.  */
 344   unsigned bivs_not_used_in_addr;
 345
 346   /* Obstack for iv structure.  */
 347   struct obstack iv_obstack;
 348
 349   /* Whether to consider just related and important candidates when replacing a
 350      use.  */
 351   bool consider_all_candidates;
 352
 353   /* Are we optimizing for speed?  */
 354   bool speed;
 355
 356   /* Whether the loop body includes any function calls.  */
 357   bool body_includes_call;
 358
 359   /* Whether the loop body can only be exited via single exit.  */
 360   bool loop_single_exit_p;
 361 };
 362
 363 /* An assignment of iv candidates to uses.  */
 364
 365 struct iv_ca
 366 {
 367   /* The number of uses covered by the assignment.  */
 368   unsigned upto;
 369
 370   /* Number of uses that cannot be expressed by the candidates in the set.  */
 371   unsigned bad_uses;
 372
 373   /* Candidate assigned to a use, together with the related costs.  */
 374   struct cost_pair **cand_for_use;
 375
 376   /* Number of times each candidate is used.  */
 377   unsigned *n_cand_uses;
 378
 379   /* The candidates used.  */
 380   bitmap cands;
 381
 382   /* The number of candidates in the set.  */
 383   unsigned n_cands;
 384
 385   /* Total number of registers needed.  */
 386   unsigned n_regs;
 387
 388   /* Total cost of expressing uses.  */
 389   comp_cost cand_use_cost;
 390
 391   /* Total cost of candidates.  */
 392   unsigned cand_cost;
 393
 394   /* Number of times each invariant is used.  */
 395   unsigned *n_invariant_uses;
 396
 397   /* The array holding the number of uses of each loop
 398      invariant expressions created by ivopt.  */
 399   unsigned *used_inv_expr;
 400
 401   /* The number of created loop invariants.  */
 402   unsigned num_used_inv_expr;
 403
 404   /* Total cost of the assignment.  */
 405   comp_cost cost;
 406 };
 407
 408 /* Difference of two iv candidate assignments.  */
 409
 410 struct iv_ca_delta
 411 {
 412   /* Changed use.  */
 413   struct iv_use *use;
 414
 415   /* An old assignment (for rollback purposes).  */
 416   struct cost_pair *old_cp;
 417
 418   /* A new assignment.  */
 419   struct cost_pair *new_cp;
 420
 421   /* Next change in the list.  */
 422   struct iv_ca_delta *next_change;
 423 };
 424
 425 /* Bound on number of candidates below that all candidates are considered.  */
 426
 427 #define CONSIDER_ALL_CANDIDATES_BOUND \
 428   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 429
 430 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 431    optimizing such a loop would help, and it would take ages).  */
 432
 433 #define MAX_CONSIDERED_USES \
 434   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 435
 436 /* If there are at most this number of ivs in the set, try removing unnecessary
 437    ivs from the set always.  */
 438
 439 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 440   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 441
 442 /* The list of trees for that the decl_rtl field must be reset is stored
 443    here.  */
 444
 445 static vec<tree> decl_rtl_to_reset;
 446
 447 static comp_cost force_expr_to_var_cost (tree, bool);
 448
 449 /* Number of uses recorded in DATA.  */
 450
 451 static inline unsigned
 452 n_iv_uses (struct ivopts_data *data)
 453 {
 454   return data->iv_uses.length ();
 455 }
 456
 457 /* Ith use recorded in DATA.  */
 458
 459 static inline struct iv_use *
 460 iv_use (struct ivopts_data *data, unsigned i)
 461 {
 462   return data->iv_uses[i];
 463 }
 464
 465 /* Number of candidates recorded in DATA.  */
 466
 467 static inline unsigned
 468 n_iv_cands (struct ivopts_data *data)
 469 {
 470   return data->iv_candidates.length ();
 471 }
 472
 473 /* Ith candidate recorded in DATA.  */
 474
 475 static inline struct iv_cand *
 476 iv_cand (struct ivopts_data *data, unsigned i)
 477 {
 478   return data->iv_candidates[i];
 479 }
 480
 481 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 482
 483 edge
 484 single_dom_exit (struct loop *loop)
 485 {
 486   edge exit = single_exit (loop);
 487
 488   if (!exit)
 489     return NULL;
 490
 491   if (!just_once_each_iteration_p (loop, exit->src))
 492     return NULL;
 493
 494   return exit;
 495 }
 496
 497 /* Dumps information about the induction variable IV to FILE.  */
 498
 499 void
 500 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 501 {
 502   if (iv->ssa_name && dump_name)
 503     {
 504       fprintf (file, "ssa name ");
 505       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 506       fprintf (file, "\n");
 507     }
 508
 509   fprintf (file, "  type ");
 510   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 511   fprintf (file, "\n");
 512
 513   if (iv->step)
 514     {
 515       fprintf (file, "  base ");
 516       print_generic_expr (file, iv->base, TDF_SLIM);
 517       fprintf (file, "\n");
 518
 519       fprintf (file, "  step ");
 520       print_generic_expr (file, iv->step, TDF_SLIM);
 521       fprintf (file, "\n");
 522     }
 523   else
 524     {
 525       fprintf (file, "  invariant ");
 526       print_generic_expr (file, iv->base, TDF_SLIM);
 527       fprintf (file, "\n");
 528     }
 529
 530   if (iv->base_object)
 531     {
 532       fprintf (file, "  base object ");
 533       print_generic_expr (file, iv->base_object, TDF_SLIM);
 534       fprintf (file, "\n");
 535     }
 536
 537   if (iv->biv_p)
 538     fprintf (file, "  is a biv\n");
 539
 540   if (iv->no_overflow)
 541     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
 542 }
 543
 544 /* Dumps information about the USE to FILE.  */
 545
 546 void
 547 dump_use (FILE *file, struct iv_use *use)
 548 {
 549   fprintf (file, "use %d", use->id);
 550   if (use->sub_id)
 551     fprintf (file, ".%d", use->sub_id);
 552
 553   fprintf (file, "\n");
 554
 555   switch (use->type)
 556     {
 557     case USE_NONLINEAR_EXPR:
 558       fprintf (file, "  generic\n");
 559       break;
 560
 561     case USE_ADDRESS:
 562       fprintf (file, "  address\n");
 563       break;
 564
 565     case USE_COMPARE:
 566       fprintf (file, "  compare\n");
 567       break;
 568
 569     default:
 570       gcc_unreachable ();
 571     }
 572
 573   fprintf (file, "  in statement ");
 574   print_gimple_stmt (file, use->stmt, 0, 0);
 575   fprintf (file, "\n");
 576
 577   fprintf (file, "  at position ");
 578   if (use->op_p)
 579     print_generic_expr (file, *use->op_p, TDF_SLIM);
 580   fprintf (file, "\n");
 581
 582   dump_iv (file, use->iv, false);
 583
 584   if (use->related_cands)
 585     {
 586       fprintf (file, "  related candidates ");
 587       dump_bitmap (file, use->related_cands);
 588     }
 589 }
 590
 591 /* Dumps information about the uses to FILE.  */
 592
 593 void
 594 dump_uses (FILE *file, struct ivopts_data *data)
 595 {
 596   unsigned i;
 597   struct iv_use *use;
 598
 599   for (i = 0; i < n_iv_uses (data); i++)
 600     {
 601       use = iv_use (data, i);
 602       do
 603         {
 604           dump_use (file, use);
 605           use = use->next;
 606         }
 607       while (use);
 608       fprintf (file, "\n");
 609     }
 610 }
 611
 612 /* Dumps information about induction variable candidate CAND to FILE.  */
 613
 614 void
 615 dump_cand (FILE *file, struct iv_cand *cand)
 616 {
 617   struct iv *iv = cand->iv;
 618
 619   fprintf (file, "candidate %d%s\n",
 620            cand->id, cand->important ? " (important)" : "");
 621
 622   if (cand->depends_on)
 623     {
 624       fprintf (file, "  depends on ");
 625       dump_bitmap (file, cand->depends_on);
 626     }
 627
 628   if (!iv)
 629     {
 630       fprintf (file, "  final value replacement\n");
 631       return;
 632     }
 633
 634   if (cand->var_before)
 635     {
 636       fprintf (file, "  var_before ");
 637       print_generic_expr (file, cand->var_before, TDF_SLIM);
 638       fprintf (file, "\n");
 639     }
 640   if (cand->var_after)
 641     {
 642       fprintf (file, "  var_after ");
 643       print_generic_expr (file, cand->var_after, TDF_SLIM);
 644       fprintf (file, "\n");
 645     }
 646
 647   switch (cand->pos)
 648     {
 649     case IP_NORMAL:
 650       fprintf (file, "  incremented before exit test\n");
 651       break;
 652
 653     case IP_BEFORE_USE:
 654       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 655       break;
 656
 657     case IP_AFTER_USE:
 658       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 659       break;
 660
 661     case IP_END:
 662       fprintf (file, "  incremented at end\n");
 663       break;
 664
 665     case IP_ORIGINAL:
 666       fprintf (file, "  original biv\n");
 667       break;
 668     }
 669
 670   dump_iv (file, iv, false);
 671 }
 672
 673 /* Returns the info for ssa version VER.  */
 674
 675 static inline struct version_info *
 676 ver_info (struct ivopts_data *data, unsigned ver)
 677 {
 678   return data->version_info + ver;
 679 }
 680
 681 /* Returns the info for ssa name NAME.  */
 682
 683 static inline struct version_info *
 684 name_info (struct ivopts_data *data, tree name)
 685 {
 686   return ver_info (data, SSA_NAME_VERSION (name));
 687 }
 688
 689 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 690    emitted in LOOP.  */
 691
 692 static bool
 693 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 694 {
 695   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 696
 697   gcc_assert (bb);
 698
 699   if (sbb == loop->latch)
 700     return true;
 701
 702   if (sbb != bb)
 703     return false;
 704
 705   return stmt == last_stmt (bb);
 706 }
 707
 708 /* Returns true if STMT if after the place where the original induction
 709    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 710    if the positions are identical.  */
 711
 712 static bool
 713 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 714 {
 715   basic_block cand_bb = gimple_bb (cand->incremented_at);
 716   basic_block stmt_bb = gimple_bb (stmt);
 717
 718   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 719     return false;
 720
 721   if (stmt_bb != cand_bb)
 722     return true;
 723
 724   if (true_if_equal
 725       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 726     return true;
 727   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 728 }
 729
 730 /* Returns true if STMT if after the place where the induction variable
 731    CAND is incremented in LOOP.  */
 732
 733 static bool
 734 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 735 {
 736   switch (cand->pos)
 737     {
 738     case IP_END:
 739       return false;
 740
 741     case IP_NORMAL:
 742       return stmt_after_ip_normal_pos (loop, stmt);
 743
 744     case IP_ORIGINAL:
 745     case IP_AFTER_USE:
 746       return stmt_after_inc_pos (cand, stmt, false);
 747
 748     case IP_BEFORE_USE:
 749       return stmt_after_inc_pos (cand, stmt, true);
 750
 751     default:
 752       gcc_unreachable ();
 753     }
 754 }
 755
 756 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 757
 758 static bool
 759 abnormal_ssa_name_p (tree exp)
 760 {
 761   if (!exp)
 762     return false;
 763
 764   if (TREE_CODE (exp) != SSA_NAME)
 765     return false;
 766
 767   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 768 }
 769
 770 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 771    abnormal phi node.  Callback for for_each_index.  */
 772
 773 static bool
 774 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 775                                   void *data ATTRIBUTE_UNUSED)
 776 {
 777   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 778     {
 779       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 780         return false;
 781       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 782         return false;
 783     }
 784
 785   return !abnormal_ssa_name_p (*index);
 786 }
 787
 788 /* Returns true if EXPR contains a ssa name that occurs in an
 789    abnormal phi node.  */
 790
 791 bool
 792 contains_abnormal_ssa_name_p (tree expr)
 793 {
 794   enum tree_code code;
 795   enum tree_code_class codeclass;
 796
 797   if (!expr)
 798     return false;
 799
 800   code = TREE_CODE (expr);
 801   codeclass = TREE_CODE_CLASS (code);
 802
 803   if (code == SSA_NAME)
 804     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 805
 806   if (code == INTEGER_CST
 807       || is_gimple_min_invariant (expr))
 808     return false;
 809
 810   if (code == ADDR_EXPR)
 811     return !for_each_index (&TREE_OPERAND (expr, 0),
 812                             idx_contains_abnormal_ssa_name_p,
 813                             NULL);
 814
 815   if (code == COND_EXPR)
 816     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 817       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 818       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 819
 820   switch (codeclass)
 821     {
 822     case tcc_binary:
 823     case tcc_comparison:
 824       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 825         return true;
 826
 827       /* Fallthru.  */
 828     case tcc_unary:
 829       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 830         return true;
 831
 832       break;
 833
 834     default:
 835       gcc_unreachable ();
 836     }
 837
 838   return false;
 839 }
 840
 841 /*  Returns the structure describing number of iterations determined from
 842     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 843
 844 static struct tree_niter_desc *
 845 niter_for_exit (struct ivopts_data *data, edge exit)
 846 {
 847   struct tree_niter_desc *desc;
 848   tree_niter_desc **slot;
 849
 850   if (!data->niters)
 851     {
 852       data->niters = new hash_map<edge, tree_niter_desc *>;
 853       slot = NULL;
 854     }
 855   else
 856     slot = data->niters->get (exit);
 857
 858   if (!slot)
 859     {
 860       /* Try to determine number of iterations.  We cannot safely work with ssa
 861          names that appear in phi nodes on abnormal edges, so that we do not
 862          create overlapping life ranges for them (PR 27283).  */
 863       desc = XNEW (struct tree_niter_desc);
 864       if (!number_of_iterations_exit (data->current_loop,
 865                                       exit, desc, true)
 866           || contains_abnormal_ssa_name_p (desc->niter))
 867         {
 868           XDELETE (desc);
 869           desc = NULL;
 870         }
 871       data->niters->put (exit, desc);
 872     }
 873   else
 874     desc = *slot;
 875
 876   return desc;
 877 }
 878
 879 /* Returns the structure describing number of iterations determined from
 880    single dominating exit of DATA->current_loop, or NULL if something
 881    goes wrong.  */
 882
 883 static struct tree_niter_desc *
 884 niter_for_single_dom_exit (struct ivopts_data *data)
 885 {
 886   edge exit = single_dom_exit (data->current_loop);
 887
 888   if (!exit)
 889     return NULL;
 890
 891   return niter_for_exit (data, exit);
 892 }
 893
 894 /* Initializes data structures used by the iv optimization pass, stored
 895    in DATA.  */
 896
 897 static void
 898 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 899 {
 900   data->version_info_size = 2 * num_ssa_names;
 901   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 902   data->relevant = BITMAP_ALLOC (NULL);
 903   data->important_candidates = BITMAP_ALLOC (NULL);
 904   data->max_inv_id = 0;
 905   data->niters = NULL;
 906   data->iv_uses.create (20);
 907   data->iv_candidates.create (20);
 908   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 909   data->inv_expr_id = 0;
 910   data->name_expansion_cache = NULL;
 911   decl_rtl_to_reset.create (20);
 912   gcc_obstack_init (&data->iv_obstack);
 913 }
 914
 915 /* Returns a memory object to that EXPR points.  In case we are able to
 916    determine that it does not point to any such object, NULL is returned.  */
 917
 918 static tree
 919 determine_base_object (tree expr)
 920 {
 921   enum tree_code code = TREE_CODE (expr);
 922   tree base, obj;
 923
 924   /* If this is a pointer casted to any type, we need to determine
 925      the base object for the pointer; so handle conversions before
 926      throwing away non-pointer expressions.  */
 927   if (CONVERT_EXPR_P (expr))
 928     return determine_base_object (TREE_OPERAND (expr, 0));
 929
 930   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 931     return NULL_TREE;
 932
 933   switch (code)
 934     {
 935     case INTEGER_CST:
 936       return NULL_TREE;
 937
 938     case ADDR_EXPR:
 939       obj = TREE_OPERAND (expr, 0);
 940       base = get_base_address (obj);
 941
 942       if (!base)
 943         return expr;
 944
 945       if (TREE_CODE (base) == MEM_REF)
 946         return determine_base_object (TREE_OPERAND (base, 0));
 947
 948       return fold_convert (ptr_type_node,
 949                            build_fold_addr_expr (base));
 950
 951     case POINTER_PLUS_EXPR:
 952       return determine_base_object (TREE_OPERAND (expr, 0));
 953
 954     case PLUS_EXPR:
 955     case MINUS_EXPR:
 956       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 957       gcc_unreachable ();
 958
 959     default:
 960       return fold_convert (ptr_type_node, expr);
 961     }
 962 }
 963
 964 /* Return true if address expression with non-DECL_P operand appears
 965    in EXPR.  */
 966
 967 static bool
 968 contain_complex_addr_expr (tree expr)
 969 {
 970   bool res = false;
 971
 972   STRIP_NOPS (expr);
 973   switch (TREE_CODE (expr))
 974     {
 975     case POINTER_PLUS_EXPR:
 976     case PLUS_EXPR:
 977     case MINUS_EXPR:
 978       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 979       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 980       break;
 981
 982     case ADDR_EXPR:
 983       return (!DECL_P (TREE_OPERAND (expr, 0)));
 984
 985     default:
 986       return false;
 987     }
 988
 989   return res;
 990 }
 991
 992 /* Allocates an induction variable with given initial value BASE and step STEP
 993    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 994
 995 static struct iv *
 996 alloc_iv (struct ivopts_data *data, tree base, tree step,
 997           bool no_overflow = false)
 998 {
 999   tree expr = base;
1000   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1001                                               sizeof (struct iv));
1002   gcc_assert (step != NULL_TREE);
1003
1004   /* Lower address expression in base except ones with DECL_P as operand.
1005      By doing this:
1006        1) More accurate cost can be computed for address expressions;
1007        2) Duplicate candidates won't be created for bases in different
1008           forms, like &a[0] and &a.  */
1009   STRIP_NOPS (expr);
1010   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1011       || contain_complex_addr_expr (expr))
1012     {
1013       aff_tree comb;
1014       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1015       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1016     }
1017
1018   iv->base = base;
1019   iv->base_object = determine_base_object (base);
1020   iv->step = step;
1021   iv->biv_p = false;
1022   iv->have_use_for = false;
1023   iv->use_id = 0;
1024   iv->ssa_name = NULL_TREE;
1025   iv->no_overflow = no_overflow;
1026   iv->have_address_use = false;
1027
1028   return iv;
1029 }
1030
1031 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1032    doesn't overflow.  */
1033
1034 static void
1035 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1036         bool no_overflow)
1037 {
1038   struct version_info *info = name_info (data, iv);
1039
1040   gcc_assert (!info->iv);
1041
1042   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1043   info->iv = alloc_iv (data, base, step, no_overflow);
1044   info->iv->ssa_name = iv;
1045 }
1046
1047 /* Finds induction variable declaration for VAR.  */
1048
1049 static struct iv *
1050 get_iv (struct ivopts_data *data, tree var)
1051 {
1052   basic_block bb;
1053   tree type = TREE_TYPE (var);
1054
1055   if (!POINTER_TYPE_P (type)
1056       && !INTEGRAL_TYPE_P (type))
1057     return NULL;
1058
1059   if (!name_info (data, var)->iv)
1060     {
1061       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1062
1063       if (!bb
1064           || !flow_bb_inside_loop_p (data->current_loop, bb))
1065         set_iv (data, var, var, build_int_cst (type, 0), true);
1066     }
1067
1068   return name_info (data, var)->iv;
1069 }
1070
1071 /* Return the first non-invariant ssa var found in EXPR.  */
1072
1073 static tree
1074 extract_single_var_from_expr (tree expr)
1075 {
1076   int i, n;
1077   tree tmp;
1078   enum tree_code code;
1079
1080   if (!expr || is_gimple_min_invariant (expr))
1081     return NULL;
1082
1083   code = TREE_CODE (expr);
1084   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1085     {
1086       n = TREE_OPERAND_LENGTH (expr);
1087       for (i = 0; i < n; i++)
1088         {
1089           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1090
1091           if (tmp)
1092             return tmp;
1093         }
1094     }
1095   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1096 }
1097
1098 /* Finds basic ivs.  */
1099
1100 static bool
1101 find_bivs (struct ivopts_data *data)
1102 {
1103   gphi *phi;
1104   affine_iv iv;
1105   tree step, type, base, stop;
1106   bool found = false;
1107   struct loop *loop = data->current_loop;
1108   gphi_iterator psi;
1109
1110   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1111     {
1112       phi = psi.phi ();
1113
1114       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1115         continue;
1116
1117       if (virtual_operand_p (PHI_RESULT (phi)))
1118         continue;
1119
1120       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1121         continue;
1122
1123       if (integer_zerop (iv.step))
1124         continue;
1125
1126       step = iv.step;
1127       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1128       /* Stop expanding iv base at the first ssa var referred by iv step.
1129          Ideally we should stop at any ssa var, because that's expensive
1130          and unusual to happen, we just do it on the first one.
1131
1132          See PR64705 for the rationale.  */
1133       stop = extract_single_var_from_expr (step);
1134       base = expand_simple_operations (base, stop);
1135       if (contains_abnormal_ssa_name_p (base)
1136           || contains_abnormal_ssa_name_p (step))
1137         continue;
1138
1139       type = TREE_TYPE (PHI_RESULT (phi));
1140       base = fold_convert (type, base);
1141       if (step)
1142         {
1143           if (POINTER_TYPE_P (type))
1144             step = convert_to_ptrofftype (step);
1145           else
1146             step = fold_convert (type, step);
1147         }
1148
1149       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1150       found = true;
1151     }
1152
1153   return found;
1154 }
1155
1156 /* Marks basic ivs.  */
1157
1158 static void
1159 mark_bivs (struct ivopts_data *data)
1160 {
1161   gphi *phi;
1162   gimple *def;
1163   tree var;
1164   struct iv *iv, *incr_iv;
1165   struct loop *loop = data->current_loop;
1166   basic_block incr_bb;
1167   gphi_iterator psi;
1168
1169   data->bivs_not_used_in_addr = 0;
1170   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1171     {
1172       phi = psi.phi ();
1173
1174       iv = get_iv (data, PHI_RESULT (phi));
1175       if (!iv)
1176         continue;
1177
1178       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1179       def = SSA_NAME_DEF_STMT (var);
1180       /* Don't mark iv peeled from other one as biv.  */
1181       if (def
1182           && gimple_code (def) == GIMPLE_PHI
1183           && gimple_bb (def) == loop->header)
1184         continue;
1185
1186       incr_iv = get_iv (data, var);
1187       if (!incr_iv)
1188         continue;
1189
1190       /* If the increment is in the subloop, ignore it.  */
1191       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1192       if (incr_bb->loop_father != data->current_loop
1193           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1194         continue;
1195
1196       iv->biv_p = true;
1197       incr_iv->biv_p = true;
1198       if (iv->no_overflow)
1199         data->bivs_not_used_in_addr++;
1200       if (incr_iv->no_overflow)
1201         data->bivs_not_used_in_addr++;
1202     }
1203 }
1204
1205 /* Checks whether STMT defines a linear induction variable and stores its
1206    parameters to IV.  */
1207
1208 static bool
1209 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1210 {
1211   tree lhs, stop;
1212   struct loop *loop = data->current_loop;
1213
1214   iv->base = NULL_TREE;
1215   iv->step = NULL_TREE;
1216
1217   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1218     return false;
1219
1220   lhs = gimple_assign_lhs (stmt);
1221   if (TREE_CODE (lhs) != SSA_NAME)
1222     return false;
1223
1224   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1225     return false;
1226
1227   /* Stop expanding iv base at the first ssa var referred by iv step.
1228      Ideally we should stop at any ssa var, because that's expensive
1229      and unusual to happen, we just do it on the first one.
1230
1231      See PR64705 for the rationale.  */
1232   stop = extract_single_var_from_expr (iv->step);
1233   iv->base = expand_simple_operations (iv->base, stop);
1234   if (contains_abnormal_ssa_name_p (iv->base)
1235       || contains_abnormal_ssa_name_p (iv->step))
1236     return false;
1237
1238   /* If STMT could throw, then do not consider STMT as defining a GIV.
1239      While this will suppress optimizations, we can not safely delete this
1240      GIV and associated statements, even if it appears it is not used.  */
1241   if (stmt_could_throw_p (stmt))
1242     return false;
1243
1244   return true;
1245 }
1246
1247 /* Finds general ivs in statement STMT.  */
1248
1249 static void
1250 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1251 {
1252   affine_iv iv;
1253
1254   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1255     return;
1256
1257   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1258 }
1259
1260 /* Finds general ivs in basic block BB.  */
1261
1262 static void
1263 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1264 {
1265   gimple_stmt_iterator bsi;
1266
1267   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1268     find_givs_in_stmt (data, gsi_stmt (bsi));
1269 }
1270
1271 /* Finds general ivs.  */
1272
1273 static void
1274 find_givs (struct ivopts_data *data)
1275 {
1276   struct loop *loop = data->current_loop;
1277   basic_block *body = get_loop_body_in_dom_order (loop);
1278   unsigned i;
1279
1280   for (i = 0; i < loop->num_nodes; i++)
1281     find_givs_in_bb (data, body[i]);
1282   free (body);
1283 }
1284
1285 /* For each ssa name defined in LOOP determines whether it is an induction
1286    variable and if so, its initial value and step.  */
1287
1288 static bool
1289 find_induction_variables (struct ivopts_data *data)
1290 {
1291   unsigned i;
1292   bitmap_iterator bi;
1293
1294   if (!find_bivs (data))
1295     return false;
1296
1297   find_givs (data);
1298   mark_bivs (data);
1299
1300   if (dump_file && (dump_flags & TDF_DETAILS))
1301     {
1302       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1303
1304       if (niter)
1305         {
1306           fprintf (dump_file, "  number of iterations ");
1307           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1308           if (!integer_zerop (niter->may_be_zero))
1309             {
1310               fprintf (dump_file, "; zero if ");
1311               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1312             }
1313           fprintf (dump_file, "\n\n");
1314         };
1315
1316       fprintf (dump_file, "Induction variables:\n\n");
1317
1318       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1319         {
1320           if (ver_info (data, i)->iv)
1321             dump_iv (dump_file, ver_info (data, i)->iv, true);
1322         }
1323     }
1324
1325   return true;
1326 }
1327
1328 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1329    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1330    is the const offset stripped from IV base.  For uses of other types,
1331    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1332
1333 static struct iv_use *
1334 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1335             gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1336             unsigned HOST_WIDE_INT addr_offset = 0)
1337 {
1338   struct iv_use *use = XCNEW (struct iv_use);
1339
1340   use->id = n_iv_uses (data);
1341   use->sub_id = 0;
1342   use->type = use_type;
1343   use->iv = iv;
1344   use->stmt = stmt;
1345   use->op_p = use_p;
1346   use->related_cands = BITMAP_ALLOC (NULL);
1347   use->next = NULL;
1348   use->addr_base = addr_base;
1349   use->addr_offset = addr_offset;
1350
1351   data->iv_uses.safe_push (use);
1352
1353   return use;
1354 }
1355
1356 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1357    The sub use is recorded under the one whose use id is ID_GROUP.  */
1358
1359 static struct iv_use *
1360 record_sub_use (struct ivopts_data *data, tree *use_p,
1361                     struct iv *iv, gimple *stmt, enum use_type use_type,
1362                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1363                     unsigned int id_group)
1364 {
1365   struct iv_use *use = XCNEW (struct iv_use);
1366   struct iv_use *group = iv_use (data, id_group);
1367
1368   use->id = group->id;
1369   use->sub_id = 0;
1370   use->type = use_type;
1371   use->iv = iv;
1372   use->stmt = stmt;
1373   use->op_p = use_p;
1374   use->related_cands = NULL;
1375   use->addr_base = addr_base;
1376   use->addr_offset = addr_offset;
1377
1378   /* Sub use list is maintained in offset ascending order.  */
1379   if (addr_offset <= group->addr_offset)
1380     {
1381       use->related_cands = group->related_cands;
1382       group->related_cands = NULL;
1383       use->next = group;
1384       data->iv_uses[id_group] = use;
1385     }
1386   else
1387     {
1388       struct iv_use *pre;
1389       do
1390         {
1391           pre = group;
1392           group = group->next;
1393         }
1394       while (group && addr_offset > group->addr_offset);
1395       use->next = pre->next;
1396       pre->next = use;
1397     }
1398
1399   return use;
1400 }
1401
1402 /* Checks whether OP is a loop-level invariant and if so, records it.
1403    NONLINEAR_USE is true if the invariant is used in a way we do not
1404    handle specially.  */
1405
1406 static void
1407 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1408 {
1409   basic_block bb;
1410   struct version_info *info;
1411
1412   if (TREE_CODE (op) != SSA_NAME
1413       || virtual_operand_p (op))
1414     return;
1415
1416   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1417   if (bb
1418       && flow_bb_inside_loop_p (data->current_loop, bb))
1419     return;
1420
1421   info = name_info (data, op);
1422   info->name = op;
1423   info->has_nonlin_use |= nonlinear_use;
1424   if (!info->inv_id)
1425     info->inv_id = ++data->max_inv_id;
1426   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1427 }
1428
1429 /* Checks whether the use OP is interesting and if so, records it.  */
1430
1431 static struct iv_use *
1432 find_interesting_uses_op (struct ivopts_data *data, tree op)
1433 {
1434   struct iv *iv;
1435   gimple *stmt;
1436   struct iv_use *use;
1437
1438   if (TREE_CODE (op) != SSA_NAME)
1439     return NULL;
1440
1441   iv = get_iv (data, op);
1442   if (!iv)
1443     return NULL;
1444
1445   if (iv->have_use_for)
1446     {
1447       use = iv_use (data, iv->use_id);
1448
1449       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1450       return use;
1451     }
1452
1453   if (integer_zerop (iv->step))
1454     {
1455       record_invariant (data, op, true);
1456       return NULL;
1457     }
1458   iv->have_use_for = true;
1459
1460   stmt = SSA_NAME_DEF_STMT (op);
1461   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1462               || is_gimple_assign (stmt));
1463
1464   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1465   iv->use_id = use->id;
1466
1467   return use;
1468 }
1469
1470 /* Given a condition in statement STMT, checks whether it is a compare
1471    of an induction variable and an invariant.  If this is the case,
1472    CONTROL_VAR is set to location of the iv, BOUND to the location of
1473    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1474    induction variable descriptions, and true is returned.  If this is not
1475    the case, CONTROL_VAR and BOUND are set to the arguments of the
1476    condition and false is returned.  */
1477
1478 static bool
1479 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1480                        tree **control_var, tree **bound,
1481                        struct iv **iv_var, struct iv **iv_bound)
1482 {
1483   /* The objects returned when COND has constant operands.  */
1484   static struct iv const_iv;
1485   static tree zero;
1486   tree *op0 = &zero, *op1 = &zero;
1487   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1488   bool ret = false;
1489
1490   if (gimple_code (stmt) == GIMPLE_COND)
1491     {
1492       gcond *cond_stmt = as_a <gcond *> (stmt);
1493       op0 = gimple_cond_lhs_ptr (cond_stmt);
1494       op1 = gimple_cond_rhs_ptr (cond_stmt);
1495     }
1496   else
1497     {
1498       op0 = gimple_assign_rhs1_ptr (stmt);
1499       op1 = gimple_assign_rhs2_ptr (stmt);
1500     }
1501
1502   zero = integer_zero_node;
1503   const_iv.step = integer_zero_node;
1504
1505   if (TREE_CODE (*op0) == SSA_NAME)
1506     iv0 = get_iv (data, *op0);
1507   if (TREE_CODE (*op1) == SSA_NAME)
1508     iv1 = get_iv (data, *op1);
1509
1510   /* Exactly one of the compared values must be an iv, and the other one must
1511      be an invariant.  */
1512   if (!iv0 || !iv1)
1513     goto end;
1514
1515   if (integer_zerop (iv0->step))
1516     {
1517       /* Control variable may be on the other side.  */
1518       std::swap (op0, op1);
1519       std::swap (iv0, iv1);
1520     }
1521   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1522
1523 end:
1524   if (control_var)
1525     *control_var = op0;
1526   if (iv_var)
1527     *iv_var = iv0;
1528   if (bound)
1529     *bound = op1;
1530   if (iv_bound)
1531     *iv_bound = iv1;
1532
1533   return ret;
1534 }
1535
1536 /* Checks whether the condition in STMT is interesting and if so,
1537    records it.  */
1538
1539 static void
1540 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1541 {
1542   tree *var_p, *bound_p;
1543   struct iv *var_iv;
1544
1545   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1546     {
1547       find_interesting_uses_op (data, *var_p);
1548       find_interesting_uses_op (data, *bound_p);
1549       return;
1550     }
1551
1552   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1553 }
1554
1555 /* Returns the outermost loop EXPR is obviously invariant in
1556    relative to the loop LOOP, i.e. if all its operands are defined
1557    outside of the returned loop.  Returns NULL if EXPR is not
1558    even obviously invariant in LOOP.  */
1559
1560 struct loop *
1561 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1562 {
1563   basic_block def_bb;
1564   unsigned i, len;
1565
1566   if (is_gimple_min_invariant (expr))
1567     return current_loops->tree_root;
1568
1569   if (TREE_CODE (expr) == SSA_NAME)
1570     {
1571       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1572       if (def_bb)
1573         {
1574           if (flow_bb_inside_loop_p (loop, def_bb))
1575             return NULL;
1576           return superloop_at_depth (loop,
1577                                      loop_depth (def_bb->loop_father) + 1);
1578         }
1579
1580       return current_loops->tree_root;
1581     }
1582
1583   if (!EXPR_P (expr))
1584     return NULL;
1585
1586   unsigned maxdepth = 0;
1587   len = TREE_OPERAND_LENGTH (expr);
1588   for (i = 0; i < len; i++)
1589     {
1590       struct loop *ivloop;
1591       if (!TREE_OPERAND (expr, i))
1592         continue;
1593
1594       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1595       if (!ivloop)
1596         return NULL;
1597       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1598     }
1599
1600   return superloop_at_depth (loop, maxdepth);
1601 }
1602
1603 /* Returns true if expression EXPR is obviously invariant in LOOP,
1604    i.e. if all its operands are defined outside of the LOOP.  LOOP
1605    should not be the function body.  */
1606
1607 bool
1608 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1609 {
1610   basic_block def_bb;
1611   unsigned i, len;
1612
1613   gcc_assert (loop_depth (loop) > 0);
1614
1615   if (is_gimple_min_invariant (expr))
1616     return true;
1617
1618   if (TREE_CODE (expr) == SSA_NAME)
1619     {
1620       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1621       if (def_bb
1622           && flow_bb_inside_loop_p (loop, def_bb))
1623         return false;
1624
1625       return true;
1626     }
1627
1628   if (!EXPR_P (expr))
1629     return false;
1630
1631   len = TREE_OPERAND_LENGTH (expr);
1632   for (i = 0; i < len; i++)
1633     if (TREE_OPERAND (expr, i)
1634         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1635       return false;
1636
1637   return true;
1638 }
1639
1640 /* Given expression EXPR which computes inductive values with respect
1641    to loop recorded in DATA, this function returns biv from which EXPR
1642    is derived by tracing definition chains of ssa variables in EXPR.  */
1643
1644 static struct iv*
1645 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1646 {
1647   struct iv *iv;
1648   unsigned i, n;
1649   tree e2, e1;
1650   enum tree_code code;
1651   gimple *stmt;
1652
1653   if (expr == NULL_TREE)
1654     return NULL;
1655
1656   if (is_gimple_min_invariant (expr))
1657     return NULL;
1658
1659   code = TREE_CODE (expr);
1660   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1661     {
1662       n = TREE_OPERAND_LENGTH (expr);
1663       for (i = 0; i < n; i++)
1664         {
1665           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1666           if (iv)
1667             return iv;
1668         }
1669     }
1670
1671   /* Stop if it's not ssa name.  */
1672   if (code != SSA_NAME)
1673     return NULL;
1674
1675   iv = get_iv (data, expr);
1676   if (!iv || integer_zerop (iv->step))
1677     return NULL;
1678   else if (iv->biv_p)
1679     return iv;
1680
1681   stmt = SSA_NAME_DEF_STMT (expr);
1682   if (gphi *phi = dyn_cast <gphi *> (stmt))
1683     {
1684       ssa_op_iter iter;
1685       use_operand_p use_p;
1686
1687       if (virtual_operand_p (gimple_phi_result (phi)))
1688         return NULL;
1689
1690       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1691         {
1692           tree use = USE_FROM_PTR (use_p);
1693           iv = find_deriving_biv_for_expr (data, use);
1694           if (iv)
1695             return iv;
1696         }
1697       return NULL;
1698     }
1699   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1700     return NULL;
1701
1702   e1 = gimple_assign_rhs1 (stmt);
1703   code = gimple_assign_rhs_code (stmt);
1704   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1705     return find_deriving_biv_for_expr (data, e1);
1706
1707   switch (code)
1708     {
1709     case MULT_EXPR:
1710     case PLUS_EXPR:
1711     case MINUS_EXPR:
1712     case POINTER_PLUS_EXPR:
1713       /* Increments, decrements and multiplications by a constant
1714          are simple.  */
1715       e2 = gimple_assign_rhs2 (stmt);
1716       iv = find_deriving_biv_for_expr (data, e2);
1717       if (iv)
1718         return iv;
1719
1720       /* Fallthru.  */
1721     CASE_CONVERT:
1722       /* Casts are simple.  */
1723       return find_deriving_biv_for_expr (data, e1);
1724
1725     default:
1726       break;
1727     }
1728
1729   return NULL;
1730 }
1731
1732 /* Record BIV, its predecessor and successor that they are used in
1733    address type uses.  */
1734
1735 static void
1736 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1737 {
1738   unsigned i;
1739   tree type, base_1, base_2;
1740   bitmap_iterator bi;
1741
1742   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1743       || biv->have_address_use || !biv->no_overflow)
1744     return;
1745
1746   type = TREE_TYPE (biv->base);
1747   if (!INTEGRAL_TYPE_P (type))
1748     return;
1749
1750   biv->have_address_use = true;
1751   data->bivs_not_used_in_addr--;
1752   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1753   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1754     {
1755       struct iv *iv = ver_info (data, i)->iv;
1756
1757       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1758           || iv->have_address_use || !iv->no_overflow)
1759         continue;
1760
1761       if (type != TREE_TYPE (iv->base)
1762           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1763         continue;
1764
1765       if (!operand_equal_p (biv->step, iv->step, 0))
1766         continue;
1767
1768       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1769       if (operand_equal_p (base_1, iv->base, 0)
1770           || operand_equal_p (base_2, biv->base, 0))
1771         {
1772           iv->have_address_use = true;
1773           data->bivs_not_used_in_addr--;
1774         }
1775     }
1776 }
1777
1778 /* Cumulates the steps of indices into DATA and replaces their values with the
1779    initial ones.  Returns false when the value of the index cannot be determined.
1780    Callback for for_each_index.  */
1781
1782 struct ifs_ivopts_data
1783 {
1784   struct ivopts_data *ivopts_data;
1785   gimple *stmt;
1786   tree step;
1787 };
1788
1789 static bool
1790 idx_find_step (tree base, tree *idx, void *data)
1791 {
1792   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1793   struct iv *iv;
1794   bool use_overflow_semantics = false;
1795   tree step, iv_base, iv_step, lbound, off;
1796   struct loop *loop = dta->ivopts_data->current_loop;
1797
1798   /* If base is a component ref, require that the offset of the reference
1799      be invariant.  */
1800   if (TREE_CODE (base) == COMPONENT_REF)
1801     {
1802       off = component_ref_field_offset (base);
1803       return expr_invariant_in_loop_p (loop, off);
1804     }
1805
1806   /* If base is array, first check whether we will be able to move the
1807      reference out of the loop (in order to take its address in strength
1808      reduction).  In order for this to work we need both lower bound
1809      and step to be loop invariants.  */
1810   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1811     {
1812       /* Moreover, for a range, the size needs to be invariant as well.  */
1813       if (TREE_CODE (base) == ARRAY_RANGE_REF
1814           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1815         return false;
1816
1817       step = array_ref_element_size (base);
1818       lbound = array_ref_low_bound (base);
1819
1820       if (!expr_invariant_in_loop_p (loop, step)
1821           || !expr_invariant_in_loop_p (loop, lbound))
1822         return false;
1823     }
1824
1825   if (TREE_CODE (*idx) != SSA_NAME)
1826     return true;
1827
1828   iv = get_iv (dta->ivopts_data, *idx);
1829   if (!iv)
1830     return false;
1831
1832   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1833           *&x[0], which is not folded and does not trigger the
1834           ARRAY_REF path below.  */
1835   *idx = iv->base;
1836
1837   if (integer_zerop (iv->step))
1838     return true;
1839
1840   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1841     {
1842       step = array_ref_element_size (base);
1843
1844       /* We only handle addresses whose step is an integer constant.  */
1845       if (TREE_CODE (step) != INTEGER_CST)
1846         return false;
1847     }
1848   else
1849     /* The step for pointer arithmetics already is 1 byte.  */
1850     step = size_one_node;
1851
1852   iv_base = iv->base;
1853   iv_step = iv->step;
1854   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1855     use_overflow_semantics = true;
1856
1857   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1858                             sizetype, &iv_base, &iv_step, dta->stmt,
1859                             use_overflow_semantics))
1860     {
1861       /* The index might wrap.  */
1862       return false;
1863     }
1864
1865   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1866   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1867
1868   if (dta->ivopts_data->bivs_not_used_in_addr)
1869     {
1870       if (!iv->biv_p)
1871         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1872
1873       record_biv_for_address_use (dta->ivopts_data, iv);
1874     }
1875   return true;
1876 }
1877
1878 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1879    object is passed to it in DATA.  */
1880
1881 static bool
1882 idx_record_use (tree base, tree *idx,
1883                 void *vdata)
1884 {
1885   struct ivopts_data *data = (struct ivopts_data *) vdata;
1886   find_interesting_uses_op (data, *idx);
1887   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1888     {
1889       find_interesting_uses_op (data, array_ref_element_size (base));
1890       find_interesting_uses_op (data, array_ref_low_bound (base));
1891     }
1892   return true;
1893 }
1894
1895 /* If we can prove that TOP = cst * BOT for some constant cst,
1896    store cst to MUL and return true.  Otherwise return false.
1897    The returned value is always sign-extended, regardless of the
1898    signedness of TOP and BOT.  */
1899
1900 static bool
1901 constant_multiple_of (tree top, tree bot, widest_int *mul)
1902 {
1903   tree mby;
1904   enum tree_code code;
1905   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1906   widest_int res, p0, p1;
1907
1908   STRIP_NOPS (top);
1909   STRIP_NOPS (bot);
1910
1911   if (operand_equal_p (top, bot, 0))
1912     {
1913       *mul = 1;
1914       return true;
1915     }
1916
1917   code = TREE_CODE (top);
1918   switch (code)
1919     {
1920     case MULT_EXPR:
1921       mby = TREE_OPERAND (top, 1);
1922       if (TREE_CODE (mby) != INTEGER_CST)
1923         return false;
1924
1925       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1926         return false;
1927
1928       *mul = wi::sext (res * wi::to_widest (mby), precision);
1929       return true;
1930
1931     case PLUS_EXPR:
1932     case MINUS_EXPR:
1933       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1934           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1935         return false;
1936
1937       if (code == MINUS_EXPR)
1938         p1 = -p1;
1939       *mul = wi::sext (p0 + p1, precision);
1940       return true;
1941
1942     case INTEGER_CST:
1943       if (TREE_CODE (bot) != INTEGER_CST)
1944         return false;
1945
1946       p0 = widest_int::from (top, SIGNED);
1947       p1 = widest_int::from (bot, SIGNED);
1948       if (p1 == 0)
1949         return false;
1950       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1951       return res == 0;
1952
1953     default:
1954       return false;
1955     }
1956 }
1957
1958 /* Return true if memory reference REF with step STEP may be unaligned.  */
1959
1960 static bool
1961 may_be_unaligned_p (tree ref, tree step)
1962 {
1963   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1964      thus they are not misaligned.  */
1965   if (TREE_CODE (ref) == TARGET_MEM_REF)
1966     return false;
1967
1968   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1969   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1970     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1971
1972   unsigned HOST_WIDE_INT bitpos;
1973   unsigned int ref_align;
1974   get_object_alignment_1 (ref, &ref_align, &bitpos);
1975   if (ref_align < align
1976       || (bitpos % align) != 0
1977       || (bitpos % BITS_PER_UNIT) != 0)
1978     return true;
1979
1980   unsigned int trailing_zeros = tree_ctz (step);
1981   if (trailing_zeros < HOST_BITS_PER_INT
1982       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1983     return true;
1984
1985   return false;
1986 }
1987
1988 /* Return true if EXPR may be non-addressable.   */
1989
1990 bool
1991 may_be_nonaddressable_p (tree expr)
1992 {
1993   switch (TREE_CODE (expr))
1994     {
1995     case TARGET_MEM_REF:
1996       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1997          target, thus they are always addressable.  */
1998       return false;
1999
2000     case COMPONENT_REF:
2001       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2002              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2003
2004     case VIEW_CONVERT_EXPR:
2005       /* This kind of view-conversions may wrap non-addressable objects
2006          and make them look addressable.  After some processing the
2007          non-addressability may be uncovered again, causing ADDR_EXPRs
2008          of inappropriate objects to be built.  */
2009       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2010           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2011         return true;
2012
2013       /* ... fall through ... */
2014
2015     case ARRAY_REF:
2016     case ARRAY_RANGE_REF:
2017       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2018
2019     CASE_CONVERT:
2020       return true;
2021
2022     default:
2023       break;
2024     }
2025
2026   return false;
2027 }
2028
2029 static tree
2030 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2031
2032 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2033    If there is an existing use which has same stripped iv base and step,
2034    this function records this one as a sub use to that; otherwise records
2035    it as a normal one.  */
2036
2037 static struct iv_use *
2038 record_group_use (struct ivopts_data *data, tree *use_p,
2039                   struct iv *iv, gimple *stmt, enum use_type use_type)
2040 {
2041   unsigned int i;
2042   struct iv_use *use;
2043   tree addr_base;
2044   unsigned HOST_WIDE_INT addr_offset;
2045
2046   /* Only support sub use for address type uses, that is, with base
2047      object.  */
2048   if (!iv->base_object)
2049     return record_use (data, use_p, iv, stmt, use_type);
2050
2051   addr_base = strip_offset (iv->base, &addr_offset);
2052   for (i = 0; i < n_iv_uses (data); i++)
2053     {
2054       use = iv_use (data, i);
2055       if (use->type != USE_ADDRESS || !use->iv->base_object)
2056         continue;
2057
2058       /* Check if it has the same stripped base and step.  */
2059       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2060           && operand_equal_p (iv->step, use->iv->step, 0)
2061           && operand_equal_p (addr_base, use->addr_base, 0))
2062         break;
2063     }
2064
2065   if (i == n_iv_uses (data))
2066     return record_use (data, use_p, iv, stmt,
2067                        use_type, addr_base, addr_offset);
2068   else
2069     return record_sub_use (data, use_p, iv, stmt,
2070                            use_type, addr_base, addr_offset, i);
2071 }
2072
2073 /* Finds addresses in *OP_P inside STMT.  */
2074
2075 static void
2076 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2077                                tree *op_p)
2078 {
2079   tree base = *op_p, step = size_zero_node;
2080   struct iv *civ;
2081   struct ifs_ivopts_data ifs_ivopts_data;
2082
2083   /* Do not play with volatile memory references.  A bit too conservative,
2084      perhaps, but safe.  */
2085   if (gimple_has_volatile_ops (stmt))
2086     goto fail;
2087
2088   /* Ignore bitfields for now.  Not really something terribly complicated
2089      to handle.  TODO.  */
2090   if (TREE_CODE (base) == BIT_FIELD_REF)
2091     goto fail;
2092
2093   base = unshare_expr (base);
2094
2095   if (TREE_CODE (base) == TARGET_MEM_REF)
2096     {
2097       tree type = build_pointer_type (TREE_TYPE (base));
2098       tree astep;
2099
2100       if (TMR_BASE (base)
2101           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2102         {
2103           civ = get_iv (data, TMR_BASE (base));
2104           if (!civ)
2105             goto fail;
2106
2107           TMR_BASE (base) = civ->base;
2108           step = civ->step;
2109         }
2110       if (TMR_INDEX2 (base)
2111           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2112         {
2113           civ = get_iv (data, TMR_INDEX2 (base));
2114           if (!civ)
2115             goto fail;
2116
2117           TMR_INDEX2 (base) = civ->base;
2118           step = civ->step;
2119         }
2120       if (TMR_INDEX (base)
2121           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2122         {
2123           civ = get_iv (data, TMR_INDEX (base));
2124           if (!civ)
2125             goto fail;
2126
2127           TMR_INDEX (base) = civ->base;
2128           astep = civ->step;
2129
2130           if (astep)
2131             {
2132               if (TMR_STEP (base))
2133                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2134
2135               step = fold_build2 (PLUS_EXPR, type, step, astep);
2136             }
2137         }
2138
2139       if (integer_zerop (step))
2140         goto fail;
2141       base = tree_mem_ref_addr (type, base);
2142     }
2143   else
2144     {
2145       ifs_ivopts_data.ivopts_data = data;
2146       ifs_ivopts_data.stmt = stmt;
2147       ifs_ivopts_data.step = size_zero_node;
2148       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2149           || integer_zerop (ifs_ivopts_data.step))
2150         goto fail;
2151       step = ifs_ivopts_data.step;
2152
2153       /* Check that the base expression is addressable.  This needs
2154          to be done after substituting bases of IVs into it.  */
2155       if (may_be_nonaddressable_p (base))
2156         goto fail;
2157
2158       /* Moreover, on strict alignment platforms, check that it is
2159          sufficiently aligned.  */
2160       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2161         goto fail;
2162
2163       base = build_fold_addr_expr (base);
2164
2165       /* Substituting bases of IVs into the base expression might
2166          have caused folding opportunities.  */
2167       if (TREE_CODE (base) == ADDR_EXPR)
2168         {
2169           tree *ref = &TREE_OPERAND (base, 0);
2170           while (handled_component_p (*ref))
2171             ref = &TREE_OPERAND (*ref, 0);
2172           if (TREE_CODE (*ref) == MEM_REF)
2173             {
2174               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2175                                       TREE_OPERAND (*ref, 0),
2176                                       TREE_OPERAND (*ref, 1));
2177               if (tem)
2178                 *ref = tem;
2179             }
2180         }
2181     }
2182
2183   civ = alloc_iv (data, base, step);
2184   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2185   return;
2186
2187 fail:
2188   for_each_index (op_p, idx_record_use, data);
2189 }
2190
2191 /* Finds and records invariants used in STMT.  */
2192
2193 static void
2194 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2195 {
2196   ssa_op_iter iter;
2197   use_operand_p use_p;
2198   tree op;
2199
2200   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2201     {
2202       op = USE_FROM_PTR (use_p);
2203       record_invariant (data, op, false);
2204     }
2205 }
2206
2207 /* Finds interesting uses of induction variables in the statement STMT.  */
2208
2209 static void
2210 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2211 {
2212   struct iv *iv;
2213   tree op, *lhs, *rhs;
2214   ssa_op_iter iter;
2215   use_operand_p use_p;
2216   enum tree_code code;
2217
2218   find_invariants_stmt (data, stmt);
2219
2220   if (gimple_code (stmt) == GIMPLE_COND)
2221     {
2222       find_interesting_uses_cond (data, stmt);
2223       return;
2224     }
2225
2226   if (is_gimple_assign (stmt))
2227     {
2228       lhs = gimple_assign_lhs_ptr (stmt);
2229       rhs = gimple_assign_rhs1_ptr (stmt);
2230
2231       if (TREE_CODE (*lhs) == SSA_NAME)
2232         {
2233           /* If the statement defines an induction variable, the uses are not
2234              interesting by themselves.  */
2235
2236           iv = get_iv (data, *lhs);
2237
2238           if (iv && !integer_zerop (iv->step))
2239             return;
2240         }
2241
2242       code = gimple_assign_rhs_code (stmt);
2243       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2244           && (REFERENCE_CLASS_P (*rhs)
2245               || is_gimple_val (*rhs)))
2246         {
2247           if (REFERENCE_CLASS_P (*rhs))
2248             find_interesting_uses_address (data, stmt, rhs);
2249           else
2250             find_interesting_uses_op (data, *rhs);
2251
2252           if (REFERENCE_CLASS_P (*lhs))
2253             find_interesting_uses_address (data, stmt, lhs);
2254           return;
2255         }
2256       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2257         {
2258           find_interesting_uses_cond (data, stmt);
2259           return;
2260         }
2261
2262       /* TODO -- we should also handle address uses of type
2263
2264          memory = call (whatever);
2265
2266          and
2267
2268          call (memory).  */
2269     }
2270
2271   if (gimple_code (stmt) == GIMPLE_PHI
2272       && gimple_bb (stmt) == data->current_loop->header)
2273     {
2274       iv = get_iv (data, PHI_RESULT (stmt));
2275
2276       if (iv && !integer_zerop (iv->step))
2277         return;
2278     }
2279
2280   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2281     {
2282       op = USE_FROM_PTR (use_p);
2283
2284       if (TREE_CODE (op) != SSA_NAME)
2285         continue;
2286
2287       iv = get_iv (data, op);
2288       if (!iv)
2289         continue;
2290
2291       find_interesting_uses_op (data, op);
2292     }
2293 }
2294
2295 /* Finds interesting uses of induction variables outside of loops
2296    on loop exit edge EXIT.  */
2297
2298 static void
2299 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2300 {
2301   gphi *phi;
2302   gphi_iterator psi;
2303   tree def;
2304
2305   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2306     {
2307       phi = psi.phi ();
2308       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2309       if (!virtual_operand_p (def))
2310         find_interesting_uses_op (data, def);
2311     }
2312 }
2313
2314 /* Finds uses of the induction variables that are interesting.  */
2315
2316 static void
2317 find_interesting_uses (struct ivopts_data *data)
2318 {
2319   basic_block bb;
2320   gimple_stmt_iterator bsi;
2321   basic_block *body = get_loop_body (data->current_loop);
2322   unsigned i;
2323   struct version_info *info;
2324   edge e;
2325
2326   if (dump_file && (dump_flags & TDF_DETAILS))
2327     fprintf (dump_file, "Uses:\n\n");
2328
2329   for (i = 0; i < data->current_loop->num_nodes; i++)
2330     {
2331       edge_iterator ei;
2332       bb = body[i];
2333
2334       FOR_EACH_EDGE (e, ei, bb->succs)
2335         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2336             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2337           find_interesting_uses_outside (data, e);
2338
2339       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2340         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2341       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2342         if (!is_gimple_debug (gsi_stmt (bsi)))
2343           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2344     }
2345
2346   if (dump_file && (dump_flags & TDF_DETAILS))
2347     {
2348       bitmap_iterator bi;
2349
2350       fprintf (dump_file, "\n");
2351
2352       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2353         {
2354           info = ver_info (data, i);
2355           if (info->inv_id)
2356             {
2357               fprintf (dump_file, "  ");
2358               print_generic_expr (dump_file, info->name, TDF_SLIM);
2359               fprintf (dump_file, " is invariant (%d)%s\n",
2360                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2361             }
2362         }
2363
2364       fprintf (dump_file, "\n");
2365     }
2366
2367   free (body);
2368 }
2369
2370 /* Compute maximum offset of [base + offset] addressing mode
2371    for memory reference represented by USE.  */
2372
2373 static HOST_WIDE_INT
2374 compute_max_addr_offset (struct iv_use *use)
2375 {
2376   int width;
2377   rtx reg, addr;
2378   HOST_WIDE_INT i, off;
2379   unsigned list_index, num;
2380   addr_space_t as;
2381   machine_mode mem_mode, addr_mode;
2382   static vec<HOST_WIDE_INT> max_offset_list;
2383
2384   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2385   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2386
2387   num = max_offset_list.length ();
2388   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2389   if (list_index >= num)
2390     {
2391       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2392       for (; num < max_offset_list.length (); num++)
2393         max_offset_list[num] = -1;
2394     }
2395
2396   off = max_offset_list[list_index];
2397   if (off != -1)
2398     return off;
2399
2400   addr_mode = targetm.addr_space.address_mode (as);
2401   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2402   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2403
2404   width = GET_MODE_BITSIZE (addr_mode) - 1;
2405   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2406     width = HOST_BITS_PER_WIDE_INT - 1;
2407
2408   for (i = width; i > 0; i--)
2409     {
2410       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2411       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2412       if (memory_address_addr_space_p (mem_mode, addr, as))
2413         break;
2414
2415       /* For some strict-alignment targets, the offset must be naturally
2416          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2417       off = ((unsigned HOST_WIDE_INT) 1 << i);
2418       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2419         {
2420           off -= GET_MODE_SIZE (mem_mode);
2421           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2422           if (memory_address_addr_space_p (mem_mode, addr, as))
2423             break;
2424         }
2425     }
2426   if (i == 0)
2427     off = 0;
2428
2429   max_offset_list[list_index] = off;
2430   return off;
2431 }
2432
2433 /* Check if all small groups should be split.  Return true if and
2434    only if:
2435
2436      1) At least one groups contain two uses with different offsets.
2437      2) No group contains more than two uses with different offsets.
2438
2439    Return false otherwise.  We want to split such groups because:
2440
2441      1) Small groups don't have much benefit and may interfer with
2442         general candidate selection.
2443      2) Size for problem with only small groups is usually small and
2444         general algorithm can handle it well.
2445
2446    TODO -- Above claim may not hold when auto increment is supported.  */
2447
2448 static bool
2449 split_all_small_groups (struct ivopts_data *data)
2450 {
2451   bool split_p = false;
2452   unsigned int i, n, distinct;
2453   struct iv_use *pre, *use;
2454
2455   n = n_iv_uses (data);
2456   for (i = 0; i < n; i++)
2457     {
2458       use = iv_use (data, i);
2459       if (!use->next)
2460         continue;
2461
2462       distinct = 1;
2463       gcc_assert (use->type == USE_ADDRESS);
2464       for (pre = use, use = use->next; use; pre = use, use = use->next)
2465         {
2466           if (pre->addr_offset != use->addr_offset)
2467             distinct++;
2468
2469           if (distinct > 2)
2470             return false;
2471         }
2472       if (distinct == 2)
2473         split_p = true;
2474     }
2475
2476   return split_p;
2477 }
2478
2479 /* For each group of address type uses, this function further groups
2480    these uses according to the maximum offset supported by target's
2481    [base + offset] addressing mode.  */
2482
2483 static void
2484 group_address_uses (struct ivopts_data *data)
2485 {
2486   HOST_WIDE_INT max_offset = -1;
2487   unsigned int i, n, sub_id;
2488   struct iv_use *pre, *use;
2489   unsigned HOST_WIDE_INT addr_offset_first;
2490
2491   /* Reset max offset to split all small groups.  */
2492   if (split_all_small_groups (data))
2493     max_offset = 0;
2494
2495   n = n_iv_uses (data);
2496   for (i = 0; i < n; i++)
2497     {
2498       use = iv_use (data, i);
2499       if (!use->next)
2500         continue;
2501
2502       gcc_assert (use->type == USE_ADDRESS);
2503       if (max_offset != 0)
2504         max_offset = compute_max_addr_offset (use);
2505
2506       while (use)
2507         {
2508           sub_id = 0;
2509           addr_offset_first = use->addr_offset;
2510           /* Only uses with offset that can fit in offset part against
2511              the first use can be grouped together.  */
2512           for (pre = use, use = use->next;
2513                use && (use->addr_offset - addr_offset_first
2514                        <= (unsigned HOST_WIDE_INT) max_offset);
2515                pre = use, use = use->next)
2516             {
2517               use->id = pre->id;
2518               use->sub_id = ++sub_id;
2519             }
2520
2521           /* Break the list and create new group.  */
2522           if (use)
2523             {
2524               pre->next = NULL;
2525               use->id = n_iv_uses (data);
2526               use->related_cands = BITMAP_ALLOC (NULL);
2527               data->iv_uses.safe_push (use);
2528             }
2529         }
2530     }
2531
2532   if (dump_file && (dump_flags & TDF_DETAILS))
2533     dump_uses (dump_file, data);
2534 }
2535
2536 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2537    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2538    we are at the top-level of the processed address.  */
2539
2540 static tree
2541 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2542                 HOST_WIDE_INT *offset)
2543 {
2544   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2545   enum tree_code code;
2546   tree type, orig_type = TREE_TYPE (expr);
2547   HOST_WIDE_INT off0, off1, st;
2548   tree orig_expr = expr;
2549
2550   STRIP_NOPS (expr);
2551
2552   type = TREE_TYPE (expr);
2553   code = TREE_CODE (expr);
2554   *offset = 0;
2555
2556   switch (code)
2557     {
2558     case INTEGER_CST:
2559       if (!cst_and_fits_in_hwi (expr)
2560           || integer_zerop (expr))
2561         return orig_expr;
2562
2563       *offset = int_cst_value (expr);
2564       return build_int_cst (orig_type, 0);
2565
2566     case POINTER_PLUS_EXPR:
2567     case PLUS_EXPR:
2568     case MINUS_EXPR:
2569       op0 = TREE_OPERAND (expr, 0);
2570       op1 = TREE_OPERAND (expr, 1);
2571
2572       op0 = strip_offset_1 (op0, false, false, &off0);
2573       op1 = strip_offset_1 (op1, false, false, &off1);
2574
2575       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2576       if (op0 == TREE_OPERAND (expr, 0)
2577           && op1 == TREE_OPERAND (expr, 1))
2578         return orig_expr;
2579
2580       if (integer_zerop (op1))
2581         expr = op0;
2582       else if (integer_zerop (op0))
2583         {
2584           if (code == MINUS_EXPR)
2585             expr = fold_build1 (NEGATE_EXPR, type, op1);
2586           else
2587             expr = op1;
2588         }
2589       else
2590         expr = fold_build2 (code, type, op0, op1);
2591
2592       return fold_convert (orig_type, expr);
2593
2594     case MULT_EXPR:
2595       op1 = TREE_OPERAND (expr, 1);
2596       if (!cst_and_fits_in_hwi (op1))
2597         return orig_expr;
2598
2599       op0 = TREE_OPERAND (expr, 0);
2600       op0 = strip_offset_1 (op0, false, false, &off0);
2601       if (op0 == TREE_OPERAND (expr, 0))
2602         return orig_expr;
2603
2604       *offset = off0 * int_cst_value (op1);
2605       if (integer_zerop (op0))
2606         expr = op0;
2607       else
2608         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2609
2610       return fold_convert (orig_type, expr);
2611
2612     case ARRAY_REF:
2613     case ARRAY_RANGE_REF:
2614       if (!inside_addr)
2615         return orig_expr;
2616
2617       step = array_ref_element_size (expr);
2618       if (!cst_and_fits_in_hwi (step))
2619         break;
2620
2621       st = int_cst_value (step);
2622       op1 = TREE_OPERAND (expr, 1);
2623       op1 = strip_offset_1 (op1, false, false, &off1);
2624       *offset = off1 * st;
2625
2626       if (top_compref
2627           && integer_zerop (op1))
2628         {
2629           /* Strip the component reference completely.  */
2630           op0 = TREE_OPERAND (expr, 0);
2631           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2632           *offset += off0;
2633           return op0;
2634         }
2635       break;
2636
2637     case COMPONENT_REF:
2638       {
2639         tree field;
2640
2641         if (!inside_addr)
2642           return orig_expr;
2643
2644         tmp = component_ref_field_offset (expr);
2645         field = TREE_OPERAND (expr, 1);
2646         if (top_compref
2647             && cst_and_fits_in_hwi (tmp)
2648             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2649           {
2650             HOST_WIDE_INT boffset, abs_off;
2651
2652             /* Strip the component reference completely.  */
2653             op0 = TREE_OPERAND (expr, 0);
2654             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2655             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2656             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2657             if (boffset < 0)
2658               abs_off = -abs_off;
2659
2660             *offset = off0 + int_cst_value (tmp) + abs_off;
2661             return op0;
2662           }
2663       }
2664       break;
2665
2666     case ADDR_EXPR:
2667       op0 = TREE_OPERAND (expr, 0);
2668       op0 = strip_offset_1 (op0, true, true, &off0);
2669       *offset += off0;
2670
2671       if (op0 == TREE_OPERAND (expr, 0))
2672         return orig_expr;
2673
2674       expr = build_fold_addr_expr (op0);
2675       return fold_convert (orig_type, expr);
2676
2677     case MEM_REF:
2678       /* ???  Offset operand?  */
2679       inside_addr = false;
2680       break;
2681
2682     default:
2683       return orig_expr;
2684     }
2685
2686   /* Default handling of expressions for that we want to recurse into
2687      the first operand.  */
2688   op0 = TREE_OPERAND (expr, 0);
2689   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2690   *offset += off0;
2691
2692   if (op0 == TREE_OPERAND (expr, 0)
2693       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2694     return orig_expr;
2695
2696   expr = copy_node (expr);
2697   TREE_OPERAND (expr, 0) = op0;
2698   if (op1)
2699     TREE_OPERAND (expr, 1) = op1;
2700
2701   /* Inside address, we might strip the top level component references,
2702      thus changing type of the expression.  Handling of ADDR_EXPR
2703      will fix that.  */
2704   expr = fold_convert (orig_type, expr);
2705
2706   return expr;
2707 }
2708
2709 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2710
2711 static tree
2712 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2713 {
2714   HOST_WIDE_INT off;
2715   tree core = strip_offset_1 (expr, false, false, &off);
2716   *offset = off;
2717   return core;
2718 }
2719
2720 /* Returns variant of TYPE that can be used as base for different uses.
2721    We return unsigned type with the same precision, which avoids problems
2722    with overflows.  */
2723
2724 static tree
2725 generic_type_for (tree type)
2726 {
2727   if (POINTER_TYPE_P (type))
2728     return unsigned_type_for (type);
2729
2730   if (TYPE_UNSIGNED (type))
2731     return type;
2732
2733   return unsigned_type_for (type);
2734 }
2735
2736 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2737    the bitmap to that we should store it.  */
2738
2739 static struct ivopts_data *fd_ivopts_data;
2740 static tree
2741 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2742 {
2743   bitmap *depends_on = (bitmap *) data;
2744   struct version_info *info;
2745
2746   if (TREE_CODE (*expr_p) != SSA_NAME)
2747     return NULL_TREE;
2748   info = name_info (fd_ivopts_data, *expr_p);
2749
2750   if (!info->inv_id || info->has_nonlin_use)
2751     return NULL_TREE;
2752
2753   if (!*depends_on)
2754     *depends_on = BITMAP_ALLOC (NULL);
2755   bitmap_set_bit (*depends_on, info->inv_id);
2756
2757   return NULL_TREE;
2758 }
2759
2760 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2761    position to POS.  If USE is not NULL, the candidate is set as related to
2762    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2763    replacement of the final value of the iv by a direct computation.  */
2764
2765 static struct iv_cand *
2766 add_candidate_1 (struct ivopts_data *data,
2767                  tree base, tree step, bool important, enum iv_position pos,
2768                  struct iv_use *use, gimple *incremented_at,
2769                  struct iv *orig_iv = NULL)
2770 {
2771   unsigned i;
2772   struct iv_cand *cand = NULL;
2773   tree type, orig_type;
2774
2775   /* For non-original variables, make sure their values are computed in a type
2776      that does not invoke undefined behavior on overflows (since in general,
2777      we cannot prove that these induction variables are non-wrapping).  */
2778   if (pos != IP_ORIGINAL)
2779     {
2780       orig_type = TREE_TYPE (base);
2781       type = generic_type_for (orig_type);
2782       if (type != orig_type)
2783         {
2784           base = fold_convert (type, base);
2785           step = fold_convert (type, step);
2786         }
2787     }
2788
2789   for (i = 0; i < n_iv_cands (data); i++)
2790     {
2791       cand = iv_cand (data, i);
2792
2793       if (cand->pos != pos)
2794         continue;
2795
2796       if (cand->incremented_at != incremented_at
2797           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2798               && cand->ainc_use != use))
2799         continue;
2800
2801       if (!cand->iv)
2802         {
2803           if (!base && !step)
2804             break;
2805
2806           continue;
2807         }
2808
2809       if (!base && !step)
2810         continue;
2811
2812       if (operand_equal_p (base, cand->iv->base, 0)
2813           && operand_equal_p (step, cand->iv->step, 0)
2814           && (TYPE_PRECISION (TREE_TYPE (base))
2815               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2816         break;
2817     }
2818
2819   if (i == n_iv_cands (data))
2820     {
2821       cand = XCNEW (struct iv_cand);
2822       cand->id = i;
2823
2824       if (!base && !step)
2825         cand->iv = NULL;
2826       else
2827         cand->iv = alloc_iv (data, base, step);
2828
2829       cand->pos = pos;
2830       if (pos != IP_ORIGINAL && cand->iv)
2831         {
2832           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2833           cand->var_after = cand->var_before;
2834         }
2835       cand->important = important;
2836       cand->incremented_at = incremented_at;
2837       data->iv_candidates.safe_push (cand);
2838
2839       if (step
2840           && TREE_CODE (step) != INTEGER_CST)
2841         {
2842           fd_ivopts_data = data;
2843           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2844         }
2845
2846       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2847         cand->ainc_use = use;
2848       else
2849         cand->ainc_use = NULL;
2850
2851       cand->orig_iv = orig_iv;
2852       if (dump_file && (dump_flags & TDF_DETAILS))
2853         dump_cand (dump_file, cand);
2854     }
2855
2856   if (important && !cand->important)
2857     {
2858       cand->important = true;
2859       if (dump_file && (dump_flags & TDF_DETAILS))
2860         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2861     }
2862
2863   if (use)
2864     {
2865       bitmap_set_bit (use->related_cands, i);
2866       if (dump_file && (dump_flags & TDF_DETAILS))
2867         fprintf (dump_file, "Candidate %d is related to use %d\n",
2868                  cand->id, use->id);
2869     }
2870
2871   return cand;
2872 }
2873
2874 /* Returns true if incrementing the induction variable at the end of the LOOP
2875    is allowed.
2876
2877    The purpose is to avoid splitting latch edge with a biv increment, thus
2878    creating a jump, possibly confusing other optimization passes and leaving
2879    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2880    is not available (so we do not have a better alternative), or if the latch
2881    edge is already nonempty.  */
2882
2883 static bool
2884 allow_ip_end_pos_p (struct loop *loop)
2885 {
2886   if (!ip_normal_pos (loop))
2887     return true;
2888
2889   if (!empty_block_p (ip_end_pos (loop)))
2890     return true;
2891
2892   return false;
2893 }
2894
2895 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2896    Important field is set to IMPORTANT.  */
2897
2898 static void
2899 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2900                         bool important, struct iv_use *use)
2901 {
2902   basic_block use_bb = gimple_bb (use->stmt);
2903   machine_mode mem_mode;
2904   unsigned HOST_WIDE_INT cstepi;
2905
2906   /* If we insert the increment in any position other than the standard
2907      ones, we must ensure that it is incremented once per iteration.
2908      It must not be in an inner nested loop, or one side of an if
2909      statement.  */
2910   if (use_bb->loop_father != data->current_loop
2911       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2912       || stmt_could_throw_p (use->stmt)
2913       || !cst_and_fits_in_hwi (step))
2914     return;
2915
2916   cstepi = int_cst_value (step);
2917
2918   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2919   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2920         || USE_STORE_PRE_INCREMENT (mem_mode))
2921        && GET_MODE_SIZE (mem_mode) == cstepi)
2922       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2923            || USE_STORE_PRE_DECREMENT (mem_mode))
2924           && GET_MODE_SIZE (mem_mode) == -cstepi))
2925     {
2926       enum tree_code code = MINUS_EXPR;
2927       tree new_base;
2928       tree new_step = step;
2929
2930       if (POINTER_TYPE_P (TREE_TYPE (base)))
2931         {
2932           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2933           code = POINTER_PLUS_EXPR;
2934         }
2935       else
2936         new_step = fold_convert (TREE_TYPE (base), new_step);
2937       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2938       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2939                        use->stmt);
2940     }
2941   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2942         || USE_STORE_POST_INCREMENT (mem_mode))
2943        && GET_MODE_SIZE (mem_mode) == cstepi)
2944       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2945            || USE_STORE_POST_DECREMENT (mem_mode))
2946           && GET_MODE_SIZE (mem_mode) == -cstepi))
2947     {
2948       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2949                        use->stmt);
2950     }
2951 }
2952
2953 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2954    position to POS.  If USE is not NULL, the candidate is set as related to
2955    it.  The candidate computation is scheduled before exit condition and at
2956    the end of loop.  */
2957
2958 static void
2959 add_candidate (struct ivopts_data *data,
2960                tree base, tree step, bool important, struct iv_use *use,
2961                struct iv *orig_iv = NULL)
2962 {
2963   gcc_assert (use == NULL || use->sub_id == 0);
2964
2965   if (ip_normal_pos (data->current_loop))
2966     add_candidate_1 (data, base, step, important,
2967                      IP_NORMAL, use, NULL, orig_iv);
2968   if (ip_end_pos (data->current_loop)
2969       && allow_ip_end_pos_p (data->current_loop))
2970     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2971 }
2972
2973 /* Adds standard iv candidates.  */
2974
2975 static void
2976 add_standard_iv_candidates (struct ivopts_data *data)
2977 {
2978   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2979
2980   /* The same for a double-integer type if it is still fast enough.  */
2981   if (TYPE_PRECISION
2982         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2983       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2984     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2985                    build_int_cst (long_integer_type_node, 1), true, NULL);
2986
2987   /* The same for a double-integer type if it is still fast enough.  */
2988   if (TYPE_PRECISION
2989         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2990       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2991     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2992                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2993 }
2994
2995
2996 /* Adds candidates bases on the old induction variable IV.  */
2997
2998 static void
2999 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3000 {
3001   gimple *phi;
3002   tree def;
3003   struct iv_cand *cand;
3004
3005   /* Check if this biv is used in address type use.  */
3006   if (iv->no_overflow  && iv->have_address_use
3007       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3008       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3009     {
3010       tree base = fold_convert (sizetype, iv->base);
3011       tree step = fold_convert (sizetype, iv->step);
3012
3013       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3014       add_candidate (data, base, step, true, NULL, iv);
3015       /* Add iv cand of the original type only if it has nonlinear use.  */
3016       if (iv->have_use_for)
3017         add_candidate (data, iv->base, iv->step, true, NULL);
3018     }
3019   else
3020     add_candidate (data, iv->base, iv->step, true, NULL);
3021
3022   /* The same, but with initial value zero.  */
3023   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3024     add_candidate (data, size_int (0), iv->step, true, NULL);
3025   else
3026     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3027                    iv->step, true, NULL);
3028
3029   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3030   if (gimple_code (phi) == GIMPLE_PHI)
3031     {
3032       /* Additionally record the possibility of leaving the original iv
3033          untouched.  */
3034       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3035       /* Don't add candidate if it's from another PHI node because
3036          it's an affine iv appearing in the form of PEELED_CHREC.  */
3037       phi = SSA_NAME_DEF_STMT (def);
3038       if (gimple_code (phi) != GIMPLE_PHI)
3039         {
3040           cand = add_candidate_1 (data,
3041                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3042                                   SSA_NAME_DEF_STMT (def));
3043           cand->var_before = iv->ssa_name;
3044           cand->var_after = def;
3045         }
3046       else
3047         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3048     }
3049 }
3050
3051 /* Adds candidates based on the old induction variables.  */
3052
3053 static void
3054 add_iv_candidate_for_bivs (struct ivopts_data *data)
3055 {
3056   unsigned i;
3057   struct iv *iv;
3058   bitmap_iterator bi;
3059
3060   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3061     {
3062       iv = ver_info (data, i)->iv;
3063       if (iv && iv->biv_p && !integer_zerop (iv->step))
3064         add_iv_candidate_for_biv (data, iv);
3065     }
3066 }
3067
3068 /* Adds candidates based on the value of USE's iv.  */
3069
3070 static void
3071 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3072 {
3073   unsigned HOST_WIDE_INT offset;
3074   tree base;
3075   tree basetype;
3076   struct iv *iv = use->iv;
3077
3078   add_candidate (data, iv->base, iv->step, false, use);
3079
3080   /* The same, but with initial value zero.  Make such variable important,
3081      since it is generic enough so that possibly many uses may be based
3082      on it.  */
3083   basetype = TREE_TYPE (iv->base);
3084   if (POINTER_TYPE_P (basetype))
3085     basetype = sizetype;
3086   add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
3087
3088   /* Third, try removing the constant offset.  Make sure to even
3089      add a candidate for &a[0] vs. (T *)&a.  */
3090   base = strip_offset (iv->base, &offset);
3091   if (offset || base != iv->base)
3092     add_candidate (data, base, iv->step, false, use);
3093
3094   /* At last, add auto-incremental candidates.  Make such variables
3095      important since other iv uses with same base object may be based
3096      on it.  */
3097   if (use != NULL && use->type == USE_ADDRESS)
3098     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3099 }
3100
3101 /* Adds candidates based on the uses.  */
3102
3103 static void
3104 add_iv_candidate_for_uses (struct ivopts_data *data)
3105 {
3106   unsigned i;
3107
3108   for (i = 0; i < n_iv_uses (data); i++)
3109     {
3110       struct iv_use *use = iv_use (data, i);
3111
3112       if (!use)
3113         continue;
3114
3115       switch (use->type)
3116         {
3117         case USE_NONLINEAR_EXPR:
3118         case USE_COMPARE:
3119         case USE_ADDRESS:
3120           /* Just add the ivs based on the value of the iv used here.  */
3121           add_iv_candidate_for_use (data, use);
3122           break;
3123
3124         default:
3125           gcc_unreachable ();
3126         }
3127     }
3128 }
3129
3130 /* Record important candidates and add them to related_cands bitmaps
3131    if needed.  */
3132
3133 static void
3134 record_important_candidates (struct ivopts_data *data)
3135 {
3136   unsigned i;
3137   struct iv_use *use;
3138
3139   for (i = 0; i < n_iv_cands (data); i++)
3140     {
3141       struct iv_cand *cand = iv_cand (data, i);
3142
3143       if (cand->important)
3144         bitmap_set_bit (data->important_candidates, i);
3145     }
3146
3147   data->consider_all_candidates = (n_iv_cands (data)
3148                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3149
3150   if (data->consider_all_candidates)
3151     {
3152       /* We will not need "related_cands" bitmaps in this case,
3153          so release them to decrease peak memory consumption.  */
3154       for (i = 0; i < n_iv_uses (data); i++)
3155         {
3156           use = iv_use (data, i);
3157           BITMAP_FREE (use->related_cands);
3158         }
3159     }
3160   else
3161     {
3162       /* Add important candidates to the related_cands bitmaps.  */
3163       for (i = 0; i < n_iv_uses (data); i++)
3164         bitmap_ior_into (iv_use (data, i)->related_cands,
3165                          data->important_candidates);
3166     }
3167 }
3168
3169 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3170    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3171    we allocate a simple list to every use.  */
3172
3173 static void
3174 alloc_use_cost_map (struct ivopts_data *data)
3175 {
3176   unsigned i, size, s;
3177
3178   for (i = 0; i < n_iv_uses (data); i++)
3179     {
3180       struct iv_use *use = iv_use (data, i);
3181
3182       if (data->consider_all_candidates)
3183         size = n_iv_cands (data);
3184       else
3185         {
3186           s = bitmap_count_bits (use->related_cands);
3187
3188           /* Round up to the power of two, so that moduling by it is fast.  */
3189           size = s ? (1 << ceil_log2 (s)) : 1;
3190         }
3191
3192       use->n_map_members = size;
3193       use->cost_map = XCNEWVEC (struct cost_pair, size);
3194     }
3195 }
3196
3197 /* Returns description of computation cost of expression whose runtime
3198    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3199
3200 static comp_cost
3201 new_cost (unsigned runtime, unsigned complexity)
3202 {
3203   comp_cost cost;
3204
3205   cost.cost = runtime;
3206   cost.complexity = complexity;
3207
3208   return cost;
3209 }
3210
3211 /* Returns true if COST is infinite.  */
3212
3213 static bool
3214 infinite_cost_p (comp_cost cost)
3215 {
3216   return cost.cost == INFTY;
3217 }
3218
3219 /* Adds costs COST1 and COST2.  */
3220
3221 static comp_cost
3222 add_costs (comp_cost cost1, comp_cost cost2)
3223 {
3224   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3225     return infinite_cost;
3226
3227   cost1.cost += cost2.cost;
3228   cost1.complexity += cost2.complexity;
3229
3230   return cost1;
3231 }
3232 /* Subtracts costs COST1 and COST2.  */
3233
3234 static comp_cost
3235 sub_costs (comp_cost cost1, comp_cost cost2)
3236 {
3237   cost1.cost -= cost2.cost;
3238   cost1.complexity -= cost2.complexity;
3239
3240   return cost1;
3241 }
3242
3243 /* Returns a negative number if COST1 < COST2, a positive number if
3244    COST1 > COST2, and 0 if COST1 = COST2.  */
3245
3246 static int
3247 compare_costs (comp_cost cost1, comp_cost cost2)
3248 {
3249   if (cost1.cost == cost2.cost)
3250     return cost1.complexity - cost2.complexity;
3251
3252   return cost1.cost - cost2.cost;
3253 }
3254
3255 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3256    on invariants DEPENDS_ON and that the value used in expressing it
3257    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3258
3259 static void
3260 set_use_iv_cost (struct ivopts_data *data,
3261                  struct iv_use *use, struct iv_cand *cand,
3262                  comp_cost cost, bitmap depends_on, tree value,
3263                  enum tree_code comp, int inv_expr_id)
3264 {
3265   unsigned i, s;
3266
3267   if (infinite_cost_p (cost))
3268     {
3269       BITMAP_FREE (depends_on);
3270       return;
3271     }
3272
3273   if (data->consider_all_candidates)
3274     {
3275       use->cost_map[cand->id].cand = cand;
3276       use->cost_map[cand->id].cost = cost;
3277       use->cost_map[cand->id].depends_on = depends_on;
3278       use->cost_map[cand->id].value = value;
3279       use->cost_map[cand->id].comp = comp;
3280       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3281       return;
3282     }
3283
3284   /* n_map_members is a power of two, so this computes modulo.  */
3285   s = cand->id & (use->n_map_members - 1);
3286   for (i = s; i < use->n_map_members; i++)
3287     if (!use->cost_map[i].cand)
3288       goto found;
3289   for (i = 0; i < s; i++)
3290     if (!use->cost_map[i].cand)
3291       goto found;
3292
3293   gcc_unreachable ();
3294
3295 found:
3296   use->cost_map[i].cand = cand;
3297   use->cost_map[i].cost = cost;
3298   use->cost_map[i].depends_on = depends_on;
3299   use->cost_map[i].value = value;
3300   use->cost_map[i].comp = comp;
3301   use->cost_map[i].inv_expr_id = inv_expr_id;
3302 }
3303
3304 /* Gets cost of (USE, CANDIDATE) pair.  */
3305
3306 static struct cost_pair *
3307 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3308                  struct iv_cand *cand)
3309 {
3310   unsigned i, s;
3311   struct cost_pair *ret;
3312
3313   if (!cand)
3314     return NULL;
3315
3316   if (data->consider_all_candidates)
3317     {
3318       ret = use->cost_map + cand->id;
3319       if (!ret->cand)
3320         return NULL;
3321
3322       return ret;
3323     }
3324
3325   /* n_map_members is a power of two, so this computes modulo.  */
3326   s = cand->id & (use->n_map_members - 1);
3327   for (i = s; i < use->n_map_members; i++)
3328     if (use->cost_map[i].cand == cand)
3329       return use->cost_map + i;
3330     else if (use->cost_map[i].cand == NULL)
3331       return NULL;
3332   for (i = 0; i < s; i++)
3333     if (use->cost_map[i].cand == cand)
3334       return use->cost_map + i;
3335     else if (use->cost_map[i].cand == NULL)
3336       return NULL;
3337
3338   return NULL;
3339 }
3340
3341 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3342 static rtx
3343 produce_memory_decl_rtl (tree obj, int *regno)
3344 {
3345   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3346   machine_mode address_mode = targetm.addr_space.address_mode (as);
3347   rtx x;
3348
3349   gcc_assert (obj);
3350   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3351     {
3352       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3353       x = gen_rtx_SYMBOL_REF (address_mode, name);
3354       SET_SYMBOL_REF_DECL (x, obj);
3355       x = gen_rtx_MEM (DECL_MODE (obj), x);
3356       set_mem_addr_space (x, as);
3357       targetm.encode_section_info (obj, x, true);
3358     }
3359   else
3360     {
3361       x = gen_raw_REG (address_mode, (*regno)++);
3362       x = gen_rtx_MEM (DECL_MODE (obj), x);
3363       set_mem_addr_space (x, as);
3364     }
3365
3366   return x;
3367 }
3368
3369 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3370    walk_tree.  DATA contains the actual fake register number.  */
3371
3372 static tree
3373 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3374 {
3375   tree obj = NULL_TREE;
3376   rtx x = NULL_RTX;
3377   int *regno = (int *) data;
3378
3379   switch (TREE_CODE (*expr_p))
3380     {
3381     case ADDR_EXPR:
3382       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3383            handled_component_p (*expr_p);
3384            expr_p = &TREE_OPERAND (*expr_p, 0))
3385         continue;
3386       obj = *expr_p;
3387       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3388         x = produce_memory_decl_rtl (obj, regno);
3389       break;
3390
3391     case SSA_NAME:
3392       *ws = 0;
3393       obj = SSA_NAME_VAR (*expr_p);
3394       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3395       if (!obj)
3396         return NULL_TREE;
3397       if (!DECL_RTL_SET_P (obj))
3398         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3399       break;
3400
3401     case VAR_DECL:
3402     case PARM_DECL:
3403     case RESULT_DECL:
3404       *ws = 0;
3405       obj = *expr_p;
3406
3407       if (DECL_RTL_SET_P (obj))
3408         break;
3409
3410       if (DECL_MODE (obj) == BLKmode)
3411         x = produce_memory_decl_rtl (obj, regno);
3412       else
3413         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3414
3415       break;
3416
3417     default:
3418       break;
3419     }
3420
3421   if (x)
3422     {
3423       decl_rtl_to_reset.safe_push (obj);
3424       SET_DECL_RTL (obj, x);
3425     }
3426
3427   return NULL_TREE;
3428 }
3429
3430 /* Determines cost of the computation of EXPR.  */
3431
3432 static unsigned
3433 computation_cost (tree expr, bool speed)
3434 {
3435   rtx_insn *seq;
3436   rtx rslt;
3437   tree type = TREE_TYPE (expr);
3438   unsigned cost;
3439   /* Avoid using hard regs in ways which may be unsupported.  */
3440   int regno = LAST_VIRTUAL_REGISTER + 1;
3441   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3442   enum node_frequency real_frequency = node->frequency;
3443
3444   node->frequency = NODE_FREQUENCY_NORMAL;
3445   crtl->maybe_hot_insn_p = speed;
3446   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3447   start_sequence ();
3448   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3449   seq = get_insns ();
3450   end_sequence ();
3451   default_rtl_profile ();
3452   node->frequency = real_frequency;
3453
3454   cost = seq_cost (seq, speed);
3455   if (MEM_P (rslt))
3456     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3457                           TYPE_ADDR_SPACE (type), speed);
3458   else if (!REG_P (rslt))
3459     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3460
3461   return cost;
3462 }
3463
3464 /* Returns variable containing the value of candidate CAND at statement AT.  */
3465
3466 static tree
3467 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3468 {
3469   if (stmt_after_increment (loop, cand, stmt))
3470     return cand->var_after;
3471   else
3472     return cand->var_before;
3473 }
3474
3475 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3476    same precision that is at least as wide as the precision of TYPE, stores
3477    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3478    type of A and B.  */
3479
3480 static tree
3481 determine_common_wider_type (tree *a, tree *b)
3482 {
3483   tree wider_type = NULL;
3484   tree suba, subb;
3485   tree atype = TREE_TYPE (*a);
3486
3487   if (CONVERT_EXPR_P (*a))
3488     {
3489       suba = TREE_OPERAND (*a, 0);
3490       wider_type = TREE_TYPE (suba);
3491       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3492         return atype;
3493     }
3494   else
3495     return atype;
3496
3497   if (CONVERT_EXPR_P (*b))
3498     {
3499       subb = TREE_OPERAND (*b, 0);
3500       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3501         return atype;
3502     }
3503   else
3504     return atype;
3505
3506   *a = suba;
3507   *b = subb;
3508   return wider_type;
3509 }
3510
3511 /* Determines the expression by that USE is expressed from induction variable
3512    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3513    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3514
3515 static bool
3516 get_computation_aff (struct loop *loop,
3517                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3518                      struct aff_tree *aff)
3519 {
3520   tree ubase = use->iv->base;
3521   tree ustep = use->iv->step;
3522   tree cbase = cand->iv->base;
3523   tree cstep = cand->iv->step, cstep_common;
3524   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3525   tree common_type, var;
3526   tree uutype;
3527   aff_tree cbase_aff, var_aff;
3528   widest_int rat;
3529
3530   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3531     {
3532       /* We do not have a precision to express the values of use.  */
3533       return false;
3534     }
3535
3536   var = var_at_stmt (loop, cand, at);
3537   uutype = unsigned_type_for (utype);
3538
3539   /* If the conversion is not noop, perform it.  */
3540   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3541     {
3542       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3543           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3544         {
3545           tree inner_base, inner_step, inner_type;
3546           inner_base = TREE_OPERAND (cbase, 0);
3547           if (CONVERT_EXPR_P (cstep))
3548             inner_step = TREE_OPERAND (cstep, 0);
3549           else
3550             inner_step = cstep;
3551
3552           inner_type = TREE_TYPE (inner_base);
3553           /* If candidate is added from a biv whose type is smaller than
3554              ctype, we know both candidate and the biv won't overflow.
3555              In this case, it's safe to skip the convertion in candidate.
3556              As an example, (unsigned short)((unsigned long)A) equals to
3557              (unsigned short)A, if A has a type no larger than short.  */
3558           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3559             {
3560               cbase = inner_base;
3561               cstep = inner_step;
3562             }
3563         }
3564       cstep = fold_convert (uutype, cstep);
3565       cbase = fold_convert (uutype, cbase);
3566       var = fold_convert (uutype, var);
3567     }
3568
3569   if (!constant_multiple_of (ustep, cstep, &rat))
3570     return false;
3571
3572   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3573      type, we achieve better folding by computing their difference in this
3574      wider type, and cast the result to UUTYPE.  We do not need to worry about
3575      overflows, as all the arithmetics will in the end be performed in UUTYPE
3576      anyway.  */
3577   common_type = determine_common_wider_type (&ubase, &cbase);
3578
3579   /* use = ubase - ratio * cbase + ratio * var.  */
3580   tree_to_aff_combination (ubase, common_type, aff);
3581   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3582   tree_to_aff_combination (var, uutype, &var_aff);
3583
3584   /* We need to shift the value if we are after the increment.  */
3585   if (stmt_after_increment (loop, cand, at))
3586     {
3587       aff_tree cstep_aff;
3588
3589       if (common_type != uutype)
3590         cstep_common = fold_convert (common_type, cstep);
3591       else
3592         cstep_common = cstep;
3593
3594       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3595       aff_combination_add (&cbase_aff, &cstep_aff);
3596     }
3597
3598   aff_combination_scale (&cbase_aff, -rat);
3599   aff_combination_add (aff, &cbase_aff);
3600   if (common_type != uutype)
3601     aff_combination_convert (aff, uutype);
3602
3603   aff_combination_scale (&var_aff, rat);
3604   aff_combination_add (aff, &var_aff);
3605
3606   return true;
3607 }
3608
3609 /* Return the type of USE.  */
3610
3611 static tree
3612 get_use_type (struct iv_use *use)
3613 {
3614   tree base_type = TREE_TYPE (use->iv->base);
3615   tree type;
3616
3617   if (use->type == USE_ADDRESS)
3618     {
3619       /* The base_type may be a void pointer.  Create a pointer type based on
3620          the mem_ref instead.  */
3621       type = build_pointer_type (TREE_TYPE (*use->op_p));
3622       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3623                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3624     }
3625   else
3626     type = base_type;
3627
3628   return type;
3629 }
3630
3631 /* Determines the expression by that USE is expressed from induction variable
3632    CAND at statement AT in LOOP.  The computation is unshared.  */
3633
3634 static tree
3635 get_computation_at (struct loop *loop,
3636                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3637 {
3638   aff_tree aff;
3639   tree type = get_use_type (use);
3640
3641   if (!get_computation_aff (loop, use, cand, at, &aff))
3642     return NULL_TREE;
3643   unshare_aff_combination (&aff);
3644   return fold_convert (type, aff_combination_to_tree (&aff));
3645 }
3646
3647 /* Determines the expression by that USE is expressed from induction variable
3648    CAND in LOOP.  The computation is unshared.  */
3649
3650 static tree
3651 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3652 {
3653   return get_computation_at (loop, use, cand, use->stmt);
3654 }
3655
3656 /* Adjust the cost COST for being in loop setup rather than loop body.
3657    If we're optimizing for space, the loop setup overhead is constant;
3658    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3659 static unsigned
3660 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3661 {
3662   if (cost == INFTY)
3663     return cost;
3664   else if (optimize_loop_for_speed_p (data->current_loop))
3665     return cost / avg_loop_niter (data->current_loop);
3666   else
3667     return cost;
3668 }
3669
3670 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3671    validity for a memory reference accessing memory of mode MODE in
3672    address space AS.  */
3673
3674
3675 bool
3676 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3677                                  addr_space_t as)
3678 {
3679 #define MAX_RATIO 128
3680   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3681   static vec<sbitmap> valid_mult_list;
3682   sbitmap valid_mult;
3683
3684   if (data_index >= valid_mult_list.length ())
3685     valid_mult_list.safe_grow_cleared (data_index + 1);
3686
3687   valid_mult = valid_mult_list[data_index];
3688   if (!valid_mult)
3689     {
3690       machine_mode address_mode = targetm.addr_space.address_mode (as);
3691       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3692       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3693       rtx addr, scaled;
3694       HOST_WIDE_INT i;
3695
3696       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3697       bitmap_clear (valid_mult);
3698       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3699       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3700       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3701         {
3702           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3703           if (memory_address_addr_space_p (mode, addr, as)
3704               || memory_address_addr_space_p (mode, scaled, as))
3705             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3706         }
3707
3708       if (dump_file && (dump_flags & TDF_DETAILS))
3709         {
3710           fprintf (dump_file, "  allowed multipliers:");
3711           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3712             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3713               fprintf (dump_file, " %d", (int) i);
3714           fprintf (dump_file, "\n");
3715           fprintf (dump_file, "\n");
3716         }
3717
3718       valid_mult_list[data_index] = valid_mult;
3719     }
3720
3721   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3722     return false;
3723
3724   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3725 }
3726
3727 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3728    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3729    variable is omitted.  Compute the cost for a memory reference that accesses
3730    a memory location of mode MEM_MODE in address space AS.
3731
3732    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3733    size of MEM_MODE / RATIO) is available.  To make this determination, we
3734    look at the size of the increment to be made, which is given in CSTEP.
3735    CSTEP may be zero if the step is unknown.
3736    STMT_AFTER_INC is true iff the statement we're looking at is after the
3737    increment of the original biv.
3738
3739    TODO -- there must be some better way.  This all is quite crude.  */
3740
3741 enum ainc_type
3742 {
3743   AINC_PRE_INC,         /* Pre increment.  */
3744   AINC_PRE_DEC,         /* Pre decrement.  */
3745   AINC_POST_INC,        /* Post increment.  */
3746   AINC_POST_DEC,        /* Post decrement.  */
3747   AINC_NONE             /* Also the number of auto increment types.  */
3748 };
3749
3750 typedef struct address_cost_data_s
3751 {
3752   HOST_WIDE_INT min_offset, max_offset;
3753   unsigned costs[2][2][2][2];
3754   unsigned ainc_costs[AINC_NONE];
3755 } *address_cost_data;
3756
3757
3758 static comp_cost
3759 get_address_cost (bool symbol_present, bool var_present,
3760                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3761                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3762                   addr_space_t as, bool speed,
3763                   bool stmt_after_inc, bool *may_autoinc)
3764 {
3765   machine_mode address_mode = targetm.addr_space.address_mode (as);
3766   static vec<address_cost_data> address_cost_data_list;
3767   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3768   address_cost_data data;
3769   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3770   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3771   unsigned cost, acost, complexity;
3772   enum ainc_type autoinc_type;
3773   bool offset_p, ratio_p, autoinc;
3774   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3775   unsigned HOST_WIDE_INT mask;
3776   unsigned bits;
3777
3778   if (data_index >= address_cost_data_list.length ())
3779     address_cost_data_list.safe_grow_cleared (data_index + 1);
3780
3781   data = address_cost_data_list[data_index];
3782   if (!data)
3783     {
3784       HOST_WIDE_INT i;
3785       HOST_WIDE_INT rat, off = 0;
3786       int old_cse_not_expected, width;
3787       unsigned sym_p, var_p, off_p, rat_p, add_c;
3788       rtx_insn *seq;
3789       rtx addr, base;
3790       rtx reg0, reg1;
3791
3792       data = (address_cost_data) xcalloc (1, sizeof (*data));
3793
3794       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3795
3796       width = GET_MODE_BITSIZE (address_mode) - 1;
3797       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3798         width = HOST_BITS_PER_WIDE_INT - 1;
3799       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3800
3801       for (i = width; i >= 0; i--)
3802         {
3803           off = -((unsigned HOST_WIDE_INT) 1 << i);
3804           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3805           if (memory_address_addr_space_p (mem_mode, addr, as))
3806             break;
3807         }
3808       data->min_offset = (i == -1? 0 : off);
3809
3810       for (i = width; i >= 0; i--)
3811         {
3812           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3813           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3814           if (memory_address_addr_space_p (mem_mode, addr, as))
3815             break;
3816           /* For some strict-alignment targets, the offset must be naturally
3817              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3818           off = mem_mode != QImode
3819                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3820                     - GET_MODE_SIZE (mem_mode)
3821                 : 0;
3822           if (off > 0)
3823             {
3824               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3825               if (memory_address_addr_space_p (mem_mode, addr, as))
3826                 break;
3827             }
3828         }
3829       if (i == -1)
3830         off = 0;
3831       data->max_offset = off;
3832
3833       if (dump_file && (dump_flags & TDF_DETAILS))
3834         {
3835           fprintf (dump_file, "get_address_cost:\n");
3836           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3837                    GET_MODE_NAME (mem_mode),
3838                    data->min_offset);
3839           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3840                    GET_MODE_NAME (mem_mode),
3841                    data->max_offset);
3842         }
3843
3844       rat = 1;
3845       for (i = 2; i <= MAX_RATIO; i++)
3846         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3847           {
3848             rat = i;
3849             break;
3850           }
3851
3852       /* Compute the cost of various addressing modes.  */
3853       acost = 0;
3854       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3855       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3856
3857       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3858           || USE_STORE_PRE_DECREMENT (mem_mode))
3859         {
3860           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3861           has_predec[mem_mode]
3862             = memory_address_addr_space_p (mem_mode, addr, as);
3863
3864           if (has_predec[mem_mode])
3865             data->ainc_costs[AINC_PRE_DEC]
3866               = address_cost (addr, mem_mode, as, speed);
3867         }
3868       if (USE_LOAD_POST_DECREMENT (mem_mode)
3869           || USE_STORE_POST_DECREMENT (mem_mode))
3870         {
3871           addr = gen_rtx_POST_DEC (address_mode, reg0);
3872           has_postdec[mem_mode]
3873             = memory_address_addr_space_p (mem_mode, addr, as);
3874
3875           if (has_postdec[mem_mode])
3876             data->ainc_costs[AINC_POST_DEC]
3877               = address_cost (addr, mem_mode, as, speed);
3878         }
3879       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3880           || USE_STORE_PRE_DECREMENT (mem_mode))
3881         {
3882           addr = gen_rtx_PRE_INC (address_mode, reg0);
3883           has_preinc[mem_mode]
3884             = memory_address_addr_space_p (mem_mode, addr, as);
3885
3886           if (has_preinc[mem_mode])
3887             data->ainc_costs[AINC_PRE_INC]
3888               = address_cost (addr, mem_mode, as, speed);
3889         }
3890       if (USE_LOAD_POST_INCREMENT (mem_mode)
3891           || USE_STORE_POST_INCREMENT (mem_mode))
3892         {
3893           addr = gen_rtx_POST_INC (address_mode, reg0);
3894           has_postinc[mem_mode]
3895             = memory_address_addr_space_p (mem_mode, addr, as);
3896
3897           if (has_postinc[mem_mode])
3898             data->ainc_costs[AINC_POST_INC]
3899               = address_cost (addr, mem_mode, as, speed);
3900         }
3901       for (i = 0; i < 16; i++)
3902         {
3903           sym_p = i & 1;
3904           var_p = (i >> 1) & 1;
3905           off_p = (i >> 2) & 1;
3906           rat_p = (i >> 3) & 1;
3907
3908           addr = reg0;
3909           if (rat_p)
3910             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3911                                    gen_int_mode (rat, address_mode));
3912
3913           if (var_p)
3914             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3915
3916           if (sym_p)
3917             {
3918               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3919               /* ??? We can run into trouble with some backends by presenting
3920                  it with symbols which haven't been properly passed through
3921                  targetm.encode_section_info.  By setting the local bit, we
3922                  enhance the probability of things working.  */
3923               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3924
3925               if (off_p)
3926                 base = gen_rtx_fmt_e (CONST, address_mode,
3927                                       gen_rtx_fmt_ee
3928                                         (PLUS, address_mode, base,
3929                                          gen_int_mode (off, address_mode)));
3930             }
3931           else if (off_p)
3932             base = gen_int_mode (off, address_mode);
3933           else
3934             base = NULL_RTX;
3935
3936           if (base)
3937             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3938
3939           start_sequence ();
3940           /* To avoid splitting addressing modes, pretend that no cse will
3941              follow.  */
3942           old_cse_not_expected = cse_not_expected;
3943           cse_not_expected = true;
3944           addr = memory_address_addr_space (mem_mode, addr, as);
3945           cse_not_expected = old_cse_not_expected;
3946           seq = get_insns ();
3947           end_sequence ();
3948
3949           acost = seq_cost (seq, speed);
3950           acost += address_cost (addr, mem_mode, as, speed);
3951
3952           if (!acost)
3953             acost = 1;
3954           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3955         }
3956
3957       /* On some targets, it is quite expensive to load symbol to a register,
3958          which makes addresses that contain symbols look much more expensive.
3959          However, the symbol will have to be loaded in any case before the
3960          loop (and quite likely we have it in register already), so it does not
3961          make much sense to penalize them too heavily.  So make some final
3962          tweaks for the SYMBOL_PRESENT modes:
3963
3964          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3965          var is cheaper, use this mode with small penalty.
3966          If VAR_PRESENT is true, try whether the mode with
3967          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3968          if this is the case, use it.  */
3969       add_c = add_cost (speed, address_mode);
3970       for (i = 0; i < 8; i++)
3971         {
3972           var_p = i & 1;
3973           off_p = (i >> 1) & 1;
3974           rat_p = (i >> 2) & 1;
3975
3976           acost = data->costs[0][1][off_p][rat_p] + 1;
3977           if (var_p)
3978             acost += add_c;
3979
3980           if (acost < data->costs[1][var_p][off_p][rat_p])
3981             data->costs[1][var_p][off_p][rat_p] = acost;
3982         }
3983
3984       if (dump_file && (dump_flags & TDF_DETAILS))
3985         {
3986           fprintf (dump_file, "Address costs:\n");
3987
3988           for (i = 0; i < 16; i++)
3989             {
3990               sym_p = i & 1;
3991               var_p = (i >> 1) & 1;
3992               off_p = (i >> 2) & 1;
3993               rat_p = (i >> 3) & 1;
3994
3995               fprintf (dump_file, "  ");
3996               if (sym_p)
3997                 fprintf (dump_file, "sym + ");
3998               if (var_p)
3999                 fprintf (dump_file, "var + ");
4000               if (off_p)
4001                 fprintf (dump_file, "cst + ");
4002               if (rat_p)
4003                 fprintf (dump_file, "rat * ");
4004
4005               acost = data->costs[sym_p][var_p][off_p][rat_p];
4006               fprintf (dump_file, "index costs %d\n", acost);
4007             }
4008           if (has_predec[mem_mode] || has_postdec[mem_mode]
4009               || has_preinc[mem_mode] || has_postinc[mem_mode])
4010             fprintf (dump_file, "  May include autoinc/dec\n");
4011           fprintf (dump_file, "\n");
4012         }
4013
4014       address_cost_data_list[data_index] = data;
4015     }
4016
4017   bits = GET_MODE_BITSIZE (address_mode);
4018   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4019   offset &= mask;
4020   if ((offset >> (bits - 1) & 1))
4021     offset |= ~mask;
4022   s_offset = offset;
4023
4024   autoinc = false;
4025   autoinc_type = AINC_NONE;
4026   msize = GET_MODE_SIZE (mem_mode);
4027   autoinc_offset = offset;
4028   if (stmt_after_inc)
4029     autoinc_offset += ratio * cstep;
4030   if (symbol_present || var_present || ratio != 1)
4031     autoinc = false;
4032   else
4033     {
4034       if (has_postinc[mem_mode] && autoinc_offset == 0
4035           && msize == cstep)
4036         autoinc_type = AINC_POST_INC;
4037       else if (has_postdec[mem_mode] && autoinc_offset == 0
4038                && msize == -cstep)
4039         autoinc_type = AINC_POST_DEC;
4040       else if (has_preinc[mem_mode] && autoinc_offset == msize
4041                && msize == cstep)
4042         autoinc_type = AINC_PRE_INC;
4043       else if (has_predec[mem_mode] && autoinc_offset == -msize
4044                && msize == -cstep)
4045         autoinc_type = AINC_PRE_DEC;
4046
4047       if (autoinc_type != AINC_NONE)
4048         autoinc = true;
4049     }
4050
4051   cost = 0;
4052   offset_p = (s_offset != 0
4053               && data->min_offset <= s_offset
4054               && s_offset <= data->max_offset);
4055   ratio_p = (ratio != 1
4056              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4057
4058   if (ratio != 1 && !ratio_p)
4059     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4060
4061   if (s_offset && !offset_p && !symbol_present)
4062     cost += add_cost (speed, address_mode);
4063
4064   if (may_autoinc)
4065     *may_autoinc = autoinc;
4066   if (autoinc)
4067     acost = data->ainc_costs[autoinc_type];
4068   else
4069     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4070   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4071   return new_cost (cost + acost, complexity);
4072 }
4073
4074  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4075     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
4076     calculating the operands of EXPR.  Returns true if successful, and returns
4077     the cost in COST.  */
4078
4079 static bool
4080 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4081                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4082 {
4083   comp_cost res;
4084   tree op1 = TREE_OPERAND (expr, 1);
4085   tree cst = TREE_OPERAND (mult, 1);
4086   tree multop = TREE_OPERAND (mult, 0);
4087   int m = exact_log2 (int_cst_value (cst));
4088   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4089   int as_cost, sa_cost;
4090   bool mult_in_op1;
4091
4092   if (!(m >= 0 && m < maxm))
4093     return false;
4094
4095   STRIP_NOPS (op1);
4096   mult_in_op1 = operand_equal_p (op1, mult, 0);
4097
4098   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4099
4100   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4101      use that in preference to a shift insn followed by an add insn.  */
4102   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4103              ? shiftadd_cost (speed, mode, m)
4104              : (mult_in_op1
4105                 ? shiftsub1_cost (speed, mode, m)
4106                 : shiftsub0_cost (speed, mode, m)));
4107
4108   res = new_cost (MIN (as_cost, sa_cost), 0);
4109   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4110
4111   STRIP_NOPS (multop);
4112   if (!is_gimple_val (multop))
4113     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4114
4115   *cost = res;
4116   return true;
4117 }
4118
4119 /* Estimates cost of forcing expression EXPR into a variable.  */
4120
4121 static comp_cost
4122 force_expr_to_var_cost (tree expr, bool speed)
4123 {
4124   static bool costs_initialized = false;
4125   static unsigned integer_cost [2];
4126   static unsigned symbol_cost [2];
4127   static unsigned address_cost [2];
4128   tree op0, op1;
4129   comp_cost cost0, cost1, cost;
4130   machine_mode mode;
4131
4132   if (!costs_initialized)
4133     {
4134       tree type = build_pointer_type (integer_type_node);
4135       tree var, addr;
4136       rtx x;
4137       int i;
4138
4139       var = create_tmp_var_raw (integer_type_node, "test_var");
4140       TREE_STATIC (var) = 1;
4141       x = produce_memory_decl_rtl (var, NULL);
4142       SET_DECL_RTL (var, x);
4143
4144       addr = build1 (ADDR_EXPR, type, var);
4145
4146
4147       for (i = 0; i < 2; i++)
4148         {
4149           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4150                                                              2000), i);
4151
4152           symbol_cost[i] = computation_cost (addr, i) + 1;
4153
4154           address_cost[i]
4155             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4156           if (dump_file && (dump_flags & TDF_DETAILS))
4157             {
4158               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4159               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4160               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4161               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4162               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4163               fprintf (dump_file, "\n");
4164             }
4165         }
4166
4167       costs_initialized = true;
4168     }
4169
4170   STRIP_NOPS (expr);
4171
4172   if (SSA_VAR_P (expr))
4173     return no_cost;
4174
4175   if (is_gimple_min_invariant (expr))
4176     {
4177       if (TREE_CODE (expr) == INTEGER_CST)
4178         return new_cost (integer_cost [speed], 0);
4179
4180       if (TREE_CODE (expr) == ADDR_EXPR)
4181         {
4182           tree obj = TREE_OPERAND (expr, 0);
4183
4184           if (TREE_CODE (obj) == VAR_DECL
4185               || TREE_CODE (obj) == PARM_DECL
4186               || TREE_CODE (obj) == RESULT_DECL)
4187             return new_cost (symbol_cost [speed], 0);
4188         }
4189
4190       return new_cost (address_cost [speed], 0);
4191     }
4192
4193   switch (TREE_CODE (expr))
4194     {
4195     case POINTER_PLUS_EXPR:
4196     case PLUS_EXPR:
4197     case MINUS_EXPR:
4198     case MULT_EXPR:
4199       op0 = TREE_OPERAND (expr, 0);
4200       op1 = TREE_OPERAND (expr, 1);
4201       STRIP_NOPS (op0);
4202       STRIP_NOPS (op1);
4203       break;
4204
4205     CASE_CONVERT:
4206     case NEGATE_EXPR:
4207       op0 = TREE_OPERAND (expr, 0);
4208       STRIP_NOPS (op0);
4209       op1 = NULL_TREE;
4210       break;
4211
4212     default:
4213       /* Just an arbitrary value, FIXME.  */
4214       return new_cost (target_spill_cost[speed], 0);
4215     }
4216
4217   if (op0 == NULL_TREE
4218       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4219     cost0 = no_cost;
4220   else
4221     cost0 = force_expr_to_var_cost (op0, speed);
4222
4223   if (op1 == NULL_TREE
4224       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4225     cost1 = no_cost;
4226   else
4227     cost1 = force_expr_to_var_cost (op1, speed);
4228
4229   mode = TYPE_MODE (TREE_TYPE (expr));
4230   switch (TREE_CODE (expr))
4231     {
4232     case POINTER_PLUS_EXPR:
4233     case PLUS_EXPR:
4234     case MINUS_EXPR:
4235     case NEGATE_EXPR:
4236       cost = new_cost (add_cost (speed, mode), 0);
4237       if (TREE_CODE (expr) != NEGATE_EXPR)
4238         {
4239           tree mult = NULL_TREE;
4240           comp_cost sa_cost;
4241           if (TREE_CODE (op1) == MULT_EXPR)
4242             mult = op1;
4243           else if (TREE_CODE (op0) == MULT_EXPR)
4244             mult = op0;
4245
4246           if (mult != NULL_TREE
4247               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4248               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4249                                     speed, &sa_cost))
4250             return sa_cost;
4251         }
4252       break;
4253
4254     CASE_CONVERT:
4255       {
4256         tree inner_mode, outer_mode;
4257         outer_mode = TREE_TYPE (expr);
4258         inner_mode = TREE_TYPE (op0);
4259         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4260                                        TYPE_MODE (inner_mode), speed), 0);
4261       }
4262       break;
4263
4264     case MULT_EXPR:
4265       if (cst_and_fits_in_hwi (op0))
4266         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4267                                              mode, speed), 0);
4268       else if (cst_and_fits_in_hwi (op1))
4269         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4270                                              mode, speed), 0);
4271       else
4272         return new_cost (target_spill_cost [speed], 0);
4273       break;
4274
4275     default:
4276       gcc_unreachable ();
4277     }
4278
4279   cost = add_costs (cost, cost0);
4280   cost = add_costs (cost, cost1);
4281
4282   /* Bound the cost by target_spill_cost.  The parts of complicated
4283      computations often are either loop invariant or at least can
4284      be shared between several iv uses, so letting this grow without
4285      limits would not give reasonable results.  */
4286   if (cost.cost > (int) target_spill_cost [speed])
4287     cost.cost = target_spill_cost [speed];
4288
4289   return cost;
4290 }
4291
4292 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4293    invariants the computation depends on.  */
4294
4295 static comp_cost
4296 force_var_cost (struct ivopts_data *data,
4297                 tree expr, bitmap *depends_on)
4298 {
4299   if (depends_on)
4300     {
4301       fd_ivopts_data = data;
4302       walk_tree (&expr, find_depends, depends_on, NULL);
4303     }
4304
4305   return force_expr_to_var_cost (expr, data->speed);
4306 }
4307
4308 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4309    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4310    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4311    invariants the computation depends on.  */
4312
4313 static comp_cost
4314 split_address_cost (struct ivopts_data *data,
4315                     tree addr, bool *symbol_present, bool *var_present,
4316                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4317 {
4318   tree core;
4319   HOST_WIDE_INT bitsize;
4320   HOST_WIDE_INT bitpos;
4321   tree toffset;
4322   machine_mode mode;
4323   int unsignedp, volatilep;
4324
4325   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4326                               &unsignedp, &volatilep, false);
4327
4328   if (toffset != 0
4329       || bitpos % BITS_PER_UNIT != 0
4330       || TREE_CODE (core) != VAR_DECL)
4331     {
4332       *symbol_present = false;
4333       *var_present = true;
4334       fd_ivopts_data = data;
4335       walk_tree (&addr, find_depends, depends_on, NULL);
4336       return new_cost (target_spill_cost[data->speed], 0);
4337     }
4338
4339   *offset += bitpos / BITS_PER_UNIT;
4340   if (TREE_STATIC (core)
4341       || DECL_EXTERNAL (core))
4342     {
4343       *symbol_present = true;
4344       *var_present = false;
4345       return no_cost;
4346     }
4347
4348   *symbol_present = false;
4349   *var_present = true;
4350   return no_cost;
4351 }
4352
4353 /* Estimates cost of expressing difference of addresses E1 - E2 as
4354    var + symbol + offset.  The value of offset is added to OFFSET,
4355    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4356    part is missing.  DEPENDS_ON is a set of the invariants the computation
4357    depends on.  */
4358
4359 static comp_cost
4360 ptr_difference_cost (struct ivopts_data *data,
4361                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4362                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4363 {
4364   HOST_WIDE_INT diff = 0;
4365   aff_tree aff_e1, aff_e2;
4366   tree type;
4367
4368   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4369
4370   if (ptr_difference_const (e1, e2, &diff))
4371     {
4372       *offset += diff;
4373       *symbol_present = false;
4374       *var_present = false;
4375       return no_cost;
4376     }
4377
4378   if (integer_zerop (e2))
4379     return split_address_cost (data, TREE_OPERAND (e1, 0),
4380                                symbol_present, var_present, offset, depends_on);
4381
4382   *symbol_present = false;
4383   *var_present = true;
4384
4385   type = signed_type_for (TREE_TYPE (e1));
4386   tree_to_aff_combination (e1, type, &aff_e1);
4387   tree_to_aff_combination (e2, type, &aff_e2);
4388   aff_combination_scale (&aff_e2, -1);
4389   aff_combination_add (&aff_e1, &aff_e2);
4390
4391   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4392 }
4393
4394 /* Estimates cost of expressing difference E1 - E2 as
4395    var + symbol + offset.  The value of offset is added to OFFSET,
4396    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4397    part is missing.  DEPENDS_ON is a set of the invariants the computation
4398    depends on.  */
4399
4400 static comp_cost
4401 difference_cost (struct ivopts_data *data,
4402                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4403                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4404 {
4405   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4406   unsigned HOST_WIDE_INT off1, off2;
4407   aff_tree aff_e1, aff_e2;
4408   tree type;
4409
4410   e1 = strip_offset (e1, &off1);
4411   e2 = strip_offset (e2, &off2);
4412   *offset += off1 - off2;
4413
4414   STRIP_NOPS (e1);
4415   STRIP_NOPS (e2);
4416
4417   if (TREE_CODE (e1) == ADDR_EXPR)
4418     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4419                                 offset, depends_on);
4420   *symbol_present = false;
4421
4422   if (operand_equal_p (e1, e2, 0))
4423     {
4424       *var_present = false;
4425       return no_cost;
4426     }
4427
4428   *var_present = true;
4429
4430   if (integer_zerop (e2))
4431     return force_var_cost (data, e1, depends_on);
4432
4433   if (integer_zerop (e1))
4434     {
4435       comp_cost cost = force_var_cost (data, e2, depends_on);
4436       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4437       return cost;
4438     }
4439
4440   type = signed_type_for (TREE_TYPE (e1));
4441   tree_to_aff_combination (e1, type, &aff_e1);
4442   tree_to_aff_combination (e2, type, &aff_e2);
4443   aff_combination_scale (&aff_e2, -1);
4444   aff_combination_add (&aff_e1, &aff_e2);
4445
4446   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4447 }
4448
4449 /* Returns true if AFF1 and AFF2 are identical.  */
4450
4451 static bool
4452 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4453 {
4454   unsigned i;
4455
4456   if (aff1->n != aff2->n)
4457     return false;
4458
4459   for (i = 0; i < aff1->n; i++)
4460     {
4461       if (aff1->elts[i].coef != aff2->elts[i].coef)
4462         return false;
4463
4464       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4465         return false;
4466     }
4467   return true;
4468 }
4469
4470 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4471
4472 static int
4473 get_expr_id (struct ivopts_data *data, tree expr)
4474 {
4475   struct iv_inv_expr_ent ent;
4476   struct iv_inv_expr_ent **slot;
4477
4478   ent.expr = expr;
4479   ent.hash = iterative_hash_expr (expr, 0);
4480   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4481   if (*slot)
4482     return (*slot)->id;
4483
4484   *slot = XNEW (struct iv_inv_expr_ent);
4485   (*slot)->expr = expr;
4486   (*slot)->hash = ent.hash;
4487   (*slot)->id = data->inv_expr_id++;
4488   return (*slot)->id;
4489 }
4490
4491 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4492    requires a new compiler generated temporary.  Returns -1 otherwise.
4493    ADDRESS_P is a flag indicating if the expression is for address
4494    computation.  */
4495
4496 static int
4497 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4498                             tree cbase, HOST_WIDE_INT ratio,
4499                             bool address_p)
4500 {
4501   aff_tree ubase_aff, cbase_aff;
4502   tree expr, ub, cb;
4503
4504   STRIP_NOPS (ubase);
4505   STRIP_NOPS (cbase);
4506   ub = ubase;
4507   cb = cbase;
4508
4509   if ((TREE_CODE (ubase) == INTEGER_CST)
4510       && (TREE_CODE (cbase) == INTEGER_CST))
4511     return -1;
4512
4513   /* Strips the constant part. */
4514   if (TREE_CODE (ubase) == PLUS_EXPR
4515       || TREE_CODE (ubase) == MINUS_EXPR
4516       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4517     {
4518       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4519         ubase = TREE_OPERAND (ubase, 0);
4520     }
4521
4522   /* Strips the constant part. */
4523   if (TREE_CODE (cbase) == PLUS_EXPR
4524       || TREE_CODE (cbase) == MINUS_EXPR
4525       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4526     {
4527       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4528         cbase = TREE_OPERAND (cbase, 0);
4529     }
4530
4531   if (address_p)
4532     {
4533       if (((TREE_CODE (ubase) == SSA_NAME)
4534            || (TREE_CODE (ubase) == ADDR_EXPR
4535                && is_gimple_min_invariant (ubase)))
4536           && (TREE_CODE (cbase) == INTEGER_CST))
4537         return -1;
4538
4539       if (((TREE_CODE (cbase) == SSA_NAME)
4540            || (TREE_CODE (cbase) == ADDR_EXPR
4541                && is_gimple_min_invariant (cbase)))
4542           && (TREE_CODE (ubase) == INTEGER_CST))
4543         return -1;
4544     }
4545
4546   if (ratio == 1)
4547     {
4548       if (operand_equal_p (ubase, cbase, 0))
4549         return -1;
4550
4551       if (TREE_CODE (ubase) == ADDR_EXPR
4552           && TREE_CODE (cbase) == ADDR_EXPR)
4553         {
4554           tree usym, csym;
4555
4556           usym = TREE_OPERAND (ubase, 0);
4557           csym = TREE_OPERAND (cbase, 0);
4558           if (TREE_CODE (usym) == ARRAY_REF)
4559             {
4560               tree ind = TREE_OPERAND (usym, 1);
4561               if (TREE_CODE (ind) == INTEGER_CST
4562                   && tree_fits_shwi_p (ind)
4563                   && tree_to_shwi (ind) == 0)
4564                 usym = TREE_OPERAND (usym, 0);
4565             }
4566           if (TREE_CODE (csym) == ARRAY_REF)
4567             {
4568               tree ind = TREE_OPERAND (csym, 1);
4569               if (TREE_CODE (ind) == INTEGER_CST
4570                   && tree_fits_shwi_p (ind)
4571                   && tree_to_shwi (ind) == 0)
4572                 csym = TREE_OPERAND (csym, 0);
4573             }
4574           if (operand_equal_p (usym, csym, 0))
4575             return -1;
4576         }
4577       /* Now do more complex comparison  */
4578       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4579       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4580       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4581         return -1;
4582     }
4583
4584   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4585   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4586
4587   aff_combination_scale (&cbase_aff, -1 * ratio);
4588   aff_combination_add (&ubase_aff, &cbase_aff);
4589   expr = aff_combination_to_tree (&ubase_aff);
4590   return get_expr_id (data, expr);
4591 }
4592
4593
4594
4595 /* Determines the cost of the computation by that USE is expressed
4596    from induction variable CAND.  If ADDRESS_P is true, we just need
4597    to create an address from it, otherwise we want to get it into
4598    register.  A set of invariants we depend on is stored in
4599    DEPENDS_ON.  AT is the statement at that the value is computed.
4600    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4601    addressing is likely.  */
4602
4603 static comp_cost
4604 get_computation_cost_at (struct ivopts_data *data,
4605                          struct iv_use *use, struct iv_cand *cand,
4606                          bool address_p, bitmap *depends_on, gimple *at,
4607                          bool *can_autoinc,
4608                          int *inv_expr_id)
4609 {
4610   tree ubase = use->iv->base, ustep = use->iv->step;
4611   tree cbase, cstep;
4612   tree utype = TREE_TYPE (ubase), ctype;
4613   unsigned HOST_WIDE_INT cstepi, offset = 0;
4614   HOST_WIDE_INT ratio, aratio;
4615   bool var_present, symbol_present, stmt_is_after_inc;
4616   comp_cost cost;
4617   widest_int rat;
4618   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4619   machine_mode mem_mode = (address_p
4620                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4621                                 : VOIDmode);
4622
4623   *depends_on = NULL;
4624
4625   /* Only consider real candidates.  */
4626   if (!cand->iv)
4627     return infinite_cost;
4628
4629   cbase = cand->iv->base;
4630   cstep = cand->iv->step;
4631   ctype = TREE_TYPE (cbase);
4632
4633   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4634     {
4635       /* We do not have a precision to express the values of use.  */
4636       return infinite_cost;
4637     }
4638
4639   if (address_p
4640       || (use->iv->base_object
4641           && cand->iv->base_object
4642           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4643           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4644     {
4645       /* Do not try to express address of an object with computation based
4646          on address of a different object.  This may cause problems in rtl
4647          level alias analysis (that does not expect this to be happening,
4648          as this is illegal in C), and would be unlikely to be useful
4649          anyway.  */
4650       if (use->iv->base_object
4651           && cand->iv->base_object
4652           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4653         return infinite_cost;
4654     }
4655
4656   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4657     {
4658       /* TODO -- add direct handling of this case.  */
4659       goto fallback;
4660     }
4661
4662   /* CSTEPI is removed from the offset in case statement is after the
4663      increment.  If the step is not constant, we use zero instead.
4664      This is a bit imprecise (there is the extra addition), but
4665      redundancy elimination is likely to transform the code so that
4666      it uses value of the variable before increment anyway,
4667      so it is not that much unrealistic.  */
4668   if (cst_and_fits_in_hwi (cstep))
4669     cstepi = int_cst_value (cstep);
4670   else
4671     cstepi = 0;
4672
4673   if (!constant_multiple_of (ustep, cstep, &rat))
4674     return infinite_cost;
4675
4676   if (wi::fits_shwi_p (rat))
4677     ratio = rat.to_shwi ();
4678   else
4679     return infinite_cost;
4680
4681   STRIP_NOPS (cbase);
4682   ctype = TREE_TYPE (cbase);
4683
4684   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4685
4686   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4687      or ratio == 1, it is better to handle this like
4688
4689      ubase - ratio * cbase + ratio * var
4690
4691      (also holds in the case ratio == -1, TODO.  */
4692
4693   if (cst_and_fits_in_hwi (cbase))
4694     {
4695       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4696       cost = difference_cost (data,
4697                               ubase, build_int_cst (utype, 0),
4698                               &symbol_present, &var_present, &offset,
4699                               depends_on);
4700       cost.cost /= avg_loop_niter (data->current_loop);
4701     }
4702   else if (ratio == 1)
4703     {
4704       tree real_cbase = cbase;
4705
4706       /* Check to see if any adjustment is needed.  */
4707       if (cstepi == 0 && stmt_is_after_inc)
4708         {
4709           aff_tree real_cbase_aff;
4710           aff_tree cstep_aff;
4711
4712           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4713                                    &real_cbase_aff);
4714           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4715
4716           aff_combination_add (&real_cbase_aff, &cstep_aff);
4717           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4718         }
4719
4720       cost = difference_cost (data,
4721                               ubase, real_cbase,
4722                               &symbol_present, &var_present, &offset,
4723                               depends_on);
4724       cost.cost /= avg_loop_niter (data->current_loop);
4725     }
4726   else if (address_p
4727            && !POINTER_TYPE_P (ctype)
4728            && multiplier_allowed_in_address_p
4729                 (ratio, mem_mode,
4730                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4731     {
4732       if (cstepi == 0 && stmt_is_after_inc)
4733         {
4734           if (POINTER_TYPE_P (ctype))
4735             cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4736           else
4737             cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4738         }
4739       cbase
4740         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4741       cost = difference_cost (data,
4742                               ubase, cbase,
4743                               &symbol_present, &var_present, &offset,
4744                               depends_on);
4745       cost.cost /= avg_loop_niter (data->current_loop);
4746     }
4747   else
4748     {
4749       cost = force_var_cost (data, cbase, depends_on);
4750       cost = add_costs (cost,
4751                         difference_cost (data,
4752                                          ubase, build_int_cst (utype, 0),
4753                                          &symbol_present, &var_present,
4754                                          &offset, depends_on));
4755       cost.cost /= avg_loop_niter (data->current_loop);
4756       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4757     }
4758
4759   /* Set of invariants depended on by sub use has already been computed
4760      for the first use in the group.  */
4761   if (use->sub_id)
4762     {
4763       cost.cost = 0;
4764       if (depends_on && *depends_on)
4765         bitmap_clear (*depends_on);
4766     }
4767   else if (inv_expr_id)
4768     {
4769       *inv_expr_id =
4770           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4771       /* Clear depends on.  */
4772       if (*inv_expr_id != -1 && depends_on && *depends_on)
4773         bitmap_clear (*depends_on);
4774     }
4775
4776   /* If we are after the increment, the value of the candidate is higher by
4777      one iteration.  */
4778   if (stmt_is_after_inc)
4779     offset -= ratio * cstepi;
4780
4781   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4782      (symbol/var1/const parts may be omitted).  If we are looking for an
4783      address, find the cost of addressing this.  */
4784   if (address_p)
4785     return add_costs (cost,
4786                       get_address_cost (symbol_present, var_present,
4787                                         offset, ratio, cstepi,
4788                                         mem_mode,
4789                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4790                                         speed, stmt_is_after_inc,
4791                                         can_autoinc));
4792
4793   /* Otherwise estimate the costs for computing the expression.  */
4794   if (!symbol_present && !var_present && !offset)
4795     {
4796       if (ratio != 1)
4797         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4798       return cost;
4799     }
4800
4801   /* Symbol + offset should be compile-time computable so consider that they
4802       are added once to the variable, if present.  */
4803   if (var_present && (symbol_present || offset))
4804     cost.cost += adjust_setup_cost (data,
4805                                     add_cost (speed, TYPE_MODE (ctype)));
4806
4807   /* Having offset does not affect runtime cost in case it is added to
4808      symbol, but it increases complexity.  */
4809   if (offset)
4810     cost.complexity++;
4811
4812   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4813
4814   aratio = ratio > 0 ? ratio : -ratio;
4815   if (aratio != 1)
4816     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4817   return cost;
4818
4819 fallback:
4820   if (can_autoinc)
4821     *can_autoinc = false;
4822
4823   {
4824     /* Just get the expression, expand it and measure the cost.  */
4825     tree comp = get_computation_at (data->current_loop, use, cand, at);
4826
4827     if (!comp)
4828       return infinite_cost;
4829
4830     if (address_p)
4831       comp = build_simple_mem_ref (comp);
4832
4833     return new_cost (computation_cost (comp, speed), 0);
4834   }
4835 }
4836
4837 /* Determines the cost of the computation by that USE is expressed
4838    from induction variable CAND.  If ADDRESS_P is true, we just need
4839    to create an address from it, otherwise we want to get it into
4840    register.  A set of invariants we depend on is stored in
4841    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4842    autoinc addressing is likely.  */
4843
4844 static comp_cost
4845 get_computation_cost (struct ivopts_data *data,
4846                       struct iv_use *use, struct iv_cand *cand,
4847                       bool address_p, bitmap *depends_on,
4848                       bool *can_autoinc, int *inv_expr_id)
4849 {
4850   return get_computation_cost_at (data,
4851                                   use, cand, address_p, depends_on, use->stmt,
4852                                   can_autoinc, inv_expr_id);
4853 }
4854
4855 /* Determines cost of basing replacement of USE on CAND in a generic
4856    expression.  */
4857
4858 static bool
4859 determine_use_iv_cost_generic (struct ivopts_data *data,
4860                                struct iv_use *use, struct iv_cand *cand)
4861 {
4862   bitmap depends_on;
4863   comp_cost cost;
4864   int inv_expr_id = -1;
4865
4866   /* The simple case first -- if we need to express value of the preserved
4867      original biv, the cost is 0.  This also prevents us from counting the
4868      cost of increment twice -- once at this use and once in the cost of
4869      the candidate.  */
4870   if (cand->pos == IP_ORIGINAL
4871       && cand->incremented_at == use->stmt)
4872     {
4873       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4874                        ERROR_MARK, -1);
4875       return true;
4876     }
4877
4878   cost = get_computation_cost (data, use, cand, false, &depends_on,
4879                                NULL, &inv_expr_id);
4880
4881   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4882                    inv_expr_id);
4883
4884   return !infinite_cost_p (cost);
4885 }
4886
4887 /* Determines cost of basing replacement of USE on CAND in an address.  */
4888
4889 static bool
4890 determine_use_iv_cost_address (struct ivopts_data *data,
4891                                struct iv_use *use, struct iv_cand *cand)
4892 {
4893   bitmap depends_on;
4894   bool can_autoinc;
4895   int inv_expr_id = -1;
4896   struct iv_use *sub_use;
4897   comp_cost sub_cost;
4898   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4899                                          &can_autoinc, &inv_expr_id);
4900
4901   if (cand->ainc_use == use)
4902     {
4903       if (can_autoinc)
4904         cost.cost -= cand->cost_step;
4905       /* If we generated the candidate solely for exploiting autoincrement
4906          opportunities, and it turns out it can't be used, set the cost to
4907          infinity to make sure we ignore it.  */
4908       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4909         cost = infinite_cost;
4910     }
4911   for (sub_use = use->next;
4912        sub_use && !infinite_cost_p (cost);
4913        sub_use = sub_use->next)
4914     {
4915        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4916                                         &can_autoinc, &inv_expr_id);
4917        cost = add_costs (cost, sub_cost);
4918     }
4919
4920   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4921                    inv_expr_id);
4922
4923   return !infinite_cost_p (cost);
4924 }
4925
4926 /* Computes value of candidate CAND at position AT in iteration NITER, and
4927    stores it to VAL.  */
4928
4929 static void
4930 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4931                aff_tree *val)
4932 {
4933   aff_tree step, delta, nit;
4934   struct iv *iv = cand->iv;
4935   tree type = TREE_TYPE (iv->base);
4936   tree steptype = type;
4937   if (POINTER_TYPE_P (type))
4938     steptype = sizetype;
4939   steptype = unsigned_type_for (type);
4940
4941   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4942   aff_combination_convert (&step, steptype);
4943   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4944   aff_combination_convert (&nit, steptype);
4945   aff_combination_mult (&nit, &step, &delta);
4946   if (stmt_after_increment (loop, cand, at))
4947     aff_combination_add (&delta, &step);
4948
4949   tree_to_aff_combination (iv->base, type, val);
4950   if (!POINTER_TYPE_P (type))
4951     aff_combination_convert (val, steptype);
4952   aff_combination_add (val, &delta);
4953 }
4954
4955 /* Returns period of induction variable iv.  */
4956
4957 static tree
4958 iv_period (struct iv *iv)
4959 {
4960   tree step = iv->step, period, type;
4961   tree pow2div;
4962
4963   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4964
4965   type = unsigned_type_for (TREE_TYPE (step));
4966   /* Period of the iv is lcm (step, type_range)/step -1,
4967      i.e., N*type_range/step - 1. Since type range is power
4968      of two, N == (step >> num_of_ending_zeros_binary (step),
4969      so the final result is
4970
4971        (type_range >> num_of_ending_zeros_binary (step)) - 1
4972
4973   */
4974   pow2div = num_ending_zeros (step);
4975
4976   period = build_low_bits_mask (type,
4977                                 (TYPE_PRECISION (type)
4978                                  - tree_to_uhwi (pow2div)));
4979
4980   return period;
4981 }
4982
4983 /* Returns the comparison operator used when eliminating the iv USE.  */
4984
4985 static enum tree_code
4986 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4987 {
4988   struct loop *loop = data->current_loop;
4989   basic_block ex_bb;
4990   edge exit;
4991
4992   ex_bb = gimple_bb (use->stmt);
4993   exit = EDGE_SUCC (ex_bb, 0);
4994   if (flow_bb_inside_loop_p (loop, exit->dest))
4995     exit = EDGE_SUCC (ex_bb, 1);
4996
4997   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4998 }
4999
5000 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5001    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5002    calculation is performed in non-wrapping type.
5003
5004    TODO: More generally, we could test for the situation that
5005          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5006          This would require knowing the sign of OFFSET.  */
5007
5008 static bool
5009 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5010 {
5011   enum tree_code code;
5012   tree e1, e2;
5013   aff_tree aff_e1, aff_e2, aff_offset;
5014
5015   if (!nowrap_type_p (TREE_TYPE (base)))
5016     return false;
5017
5018   base = expand_simple_operations (base);
5019
5020   if (TREE_CODE (base) == SSA_NAME)
5021     {
5022       gimple *stmt = SSA_NAME_DEF_STMT (base);
5023
5024       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5025         return false;
5026
5027       code = gimple_assign_rhs_code (stmt);
5028       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5029         return false;
5030
5031       e1 = gimple_assign_rhs1 (stmt);
5032       e2 = gimple_assign_rhs2 (stmt);
5033     }
5034   else
5035     {
5036       code = TREE_CODE (base);
5037       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5038         return false;
5039       e1 = TREE_OPERAND (base, 0);
5040       e2 = TREE_OPERAND (base, 1);
5041     }
5042
5043   /* Use affine expansion as deeper inspection to prove the equality.  */
5044   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5045                                   &aff_e2, &data->name_expansion_cache);
5046   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5047                                   &aff_offset, &data->name_expansion_cache);
5048   aff_combination_scale (&aff_offset, -1);
5049   switch (code)
5050     {
5051     case PLUS_EXPR:
5052       aff_combination_add (&aff_e2, &aff_offset);
5053       if (aff_combination_zero_p (&aff_e2))
5054         return true;
5055
5056       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5057                                       &aff_e1, &data->name_expansion_cache);
5058       aff_combination_add (&aff_e1, &aff_offset);
5059       return aff_combination_zero_p (&aff_e1);
5060
5061     case POINTER_PLUS_EXPR:
5062       aff_combination_add (&aff_e2, &aff_offset);
5063       return aff_combination_zero_p (&aff_e2);
5064
5065     default:
5066       return false;
5067     }
5068 }
5069
5070 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5071    comparison with CAND.  NITER describes the number of iterations of
5072    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5073
5074    We aim to handle the following situation:
5075
5076    sometype *base, *p;
5077    int a, b, i;
5078
5079    i = a;
5080    p = p_0 = base + a;
5081
5082    do
5083      {
5084        bla (*p);
5085        p++;
5086        i++;
5087      }
5088    while (i < b);
5089
5090    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5091    We aim to optimize this to
5092
5093    p = p_0 = base + a;
5094    do
5095      {
5096        bla (*p);
5097        p++;
5098      }
5099    while (p < p_0 - a + b);
5100
5101    This preserves the correctness, since the pointer arithmetics does not
5102    overflow.  More precisely:
5103
5104    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5105       overflow in computing it or the values of p.
5106    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5107       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5108
5109 static bool
5110 iv_elimination_compare_lt (struct ivopts_data *data,
5111                            struct iv_cand *cand, enum tree_code *comp_p,
5112                            struct tree_niter_desc *niter)
5113 {
5114   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5115   struct aff_tree nit, tmpa, tmpb;
5116   enum tree_code comp;
5117   HOST_WIDE_INT step;
5118
5119   /* We need to know that the candidate induction variable does not overflow.
5120      While more complex analysis may be used to prove this, for now just
5121      check that the variable appears in the original program and that it
5122      is computed in a type that guarantees no overflows.  */
5123   cand_type = TREE_TYPE (cand->iv->base);
5124   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5125     return false;
5126
5127   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5128      the calculation of the BOUND could overflow, making the comparison
5129      invalid.  */
5130   if (!data->loop_single_exit_p)
5131     return false;
5132
5133   /* We need to be able to decide whether candidate is increasing or decreasing
5134      in order to choose the right comparison operator.  */
5135   if (!cst_and_fits_in_hwi (cand->iv->step))
5136     return false;
5137   step = int_cst_value (cand->iv->step);
5138
5139   /* Check that the number of iterations matches the expected pattern:
5140      a + 1 > b ? 0 : b - a - 1.  */
5141   mbz = niter->may_be_zero;
5142   if (TREE_CODE (mbz) == GT_EXPR)
5143     {
5144       /* Handle a + 1 > b.  */
5145       tree op0 = TREE_OPERAND (mbz, 0);
5146       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5147         {
5148           a = TREE_OPERAND (op0, 0);
5149           b = TREE_OPERAND (mbz, 1);
5150         }
5151       else
5152         return false;
5153     }
5154   else if (TREE_CODE (mbz) == LT_EXPR)
5155     {
5156       tree op1 = TREE_OPERAND (mbz, 1);
5157
5158       /* Handle b < a + 1.  */
5159       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5160         {
5161           a = TREE_OPERAND (op1, 0);
5162           b = TREE_OPERAND (mbz, 0);
5163         }
5164       else
5165         return false;
5166     }
5167   else
5168     return false;
5169
5170   /* Expected number of iterations is B - A - 1.  Check that it matches
5171      the actual number, i.e., that B - A - NITER = 1.  */
5172   tree_to_aff_combination (niter->niter, nit_type, &nit);
5173   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5174   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5175   aff_combination_scale (&nit, -1);
5176   aff_combination_scale (&tmpa, -1);
5177   aff_combination_add (&tmpb, &tmpa);
5178   aff_combination_add (&tmpb, &nit);
5179   if (tmpb.n != 0 || tmpb.offset != 1)
5180     return false;
5181
5182   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5183      overflow.  */
5184   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5185                         cand->iv->step,
5186                         fold_convert (TREE_TYPE (cand->iv->step), a));
5187   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5188     return false;
5189
5190   /* Determine the new comparison operator.  */
5191   comp = step < 0 ? GT_EXPR : LT_EXPR;
5192   if (*comp_p == NE_EXPR)
5193     *comp_p = comp;
5194   else if (*comp_p == EQ_EXPR)
5195     *comp_p = invert_tree_comparison (comp, false);
5196   else
5197     gcc_unreachable ();
5198
5199   return true;
5200 }
5201
5202 /* Check whether it is possible to express the condition in USE by comparison
5203    of candidate CAND.  If so, store the value compared with to BOUND, and the
5204    comparison operator to COMP.  */
5205
5206 static bool
5207 may_eliminate_iv (struct ivopts_data *data,
5208                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5209                   enum tree_code *comp)
5210 {
5211   basic_block ex_bb;
5212   edge exit;
5213   tree period;
5214   struct loop *loop = data->current_loop;
5215   aff_tree bnd;
5216   struct tree_niter_desc *desc = NULL;
5217
5218   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5219     return false;
5220
5221   /* For now works only for exits that dominate the loop latch.
5222      TODO: extend to other conditions inside loop body.  */
5223   ex_bb = gimple_bb (use->stmt);
5224   if (use->stmt != last_stmt (ex_bb)
5225       || gimple_code (use->stmt) != GIMPLE_COND
5226       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5227     return false;
5228
5229   exit = EDGE_SUCC (ex_bb, 0);
5230   if (flow_bb_inside_loop_p (loop, exit->dest))
5231     exit = EDGE_SUCC (ex_bb, 1);
5232   if (flow_bb_inside_loop_p (loop, exit->dest))
5233     return false;
5234
5235   desc = niter_for_exit (data, exit);
5236   if (!desc)
5237     return false;
5238
5239   /* Determine whether we can use the variable to test the exit condition.
5240      This is the case iff the period of the induction variable is greater
5241      than the number of iterations for which the exit condition is true.  */
5242   period = iv_period (cand->iv);
5243
5244   /* If the number of iterations is constant, compare against it directly.  */
5245   if (TREE_CODE (desc->niter) == INTEGER_CST)
5246     {
5247       /* See cand_value_at.  */
5248       if (stmt_after_increment (loop, cand, use->stmt))
5249         {
5250           if (!tree_int_cst_lt (desc->niter, period))
5251             return false;
5252         }
5253       else
5254         {
5255           if (tree_int_cst_lt (period, desc->niter))
5256             return false;
5257         }
5258     }
5259
5260   /* If not, and if this is the only possible exit of the loop, see whether
5261      we can get a conservative estimate on the number of iterations of the
5262      entire loop and compare against that instead.  */
5263   else
5264     {
5265       widest_int period_value, max_niter;
5266
5267       max_niter = desc->max;
5268       if (stmt_after_increment (loop, cand, use->stmt))
5269         max_niter += 1;
5270       period_value = wi::to_widest (period);
5271       if (wi::gtu_p (max_niter, period_value))
5272         {
5273           /* See if we can take advantage of inferred loop bound information.  */
5274           if (data->loop_single_exit_p)
5275             {
5276               if (!max_loop_iterations (loop, &max_niter))
5277                 return false;
5278               /* The loop bound is already adjusted by adding 1.  */
5279               if (wi::gtu_p (max_niter, period_value))
5280                 return false;
5281             }
5282           else
5283             return false;
5284         }
5285     }
5286
5287   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5288
5289   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5290                          aff_combination_to_tree (&bnd));
5291   *comp = iv_elimination_compare (data, use);
5292
5293   /* It is unlikely that computing the number of iterations using division
5294      would be more profitable than keeping the original induction variable.  */
5295   if (expression_expensive_p (*bound))
5296     return false;
5297
5298   /* Sometimes, it is possible to handle the situation that the number of
5299      iterations may be zero unless additional assumtions by using <
5300      instead of != in the exit condition.
5301
5302      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5303            base the exit condition on it.  However, that is often too
5304            expensive.  */
5305   if (!integer_zerop (desc->may_be_zero))
5306     return iv_elimination_compare_lt (data, cand, comp, desc);
5307
5308   return true;
5309 }
5310
5311  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5312     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5313
5314 static int
5315 parm_decl_cost (struct ivopts_data *data, tree bound)
5316 {
5317   tree sbound = bound;
5318   STRIP_NOPS (sbound);
5319
5320   if (TREE_CODE (sbound) == SSA_NAME
5321       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5322       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5323       && data->body_includes_call)
5324     return COSTS_N_INSNS (1);
5325
5326   return 0;
5327 }
5328
5329 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5330
5331 static bool
5332 determine_use_iv_cost_condition (struct ivopts_data *data,
5333                                  struct iv_use *use, struct iv_cand *cand)
5334 {
5335   tree bound = NULL_TREE;
5336   struct iv *cmp_iv;
5337   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5338   comp_cost elim_cost, express_cost, cost, bound_cost;
5339   bool ok;
5340   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5341   tree *control_var, *bound_cst;
5342   enum tree_code comp = ERROR_MARK;
5343
5344   /* Only consider real candidates.  */
5345   if (!cand->iv)
5346     {
5347       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5348                        ERROR_MARK, -1);
5349       return false;
5350     }
5351
5352   /* Try iv elimination.  */
5353   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5354     {
5355       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5356       if (elim_cost.cost == 0)
5357         elim_cost.cost = parm_decl_cost (data, bound);
5358       else if (TREE_CODE (bound) == INTEGER_CST)
5359         elim_cost.cost = 0;
5360       /* If we replace a loop condition 'i < n' with 'p < base + n',
5361          depends_on_elim will have 'base' and 'n' set, which implies
5362          that both 'base' and 'n' will be live during the loop.  More likely,
5363          'base + n' will be loop invariant, resulting in only one live value
5364          during the loop.  So in that case we clear depends_on_elim and set
5365         elim_inv_expr_id instead.  */
5366       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5367         {
5368           elim_inv_expr_id = get_expr_id (data, bound);
5369           bitmap_clear (depends_on_elim);
5370         }
5371       /* The bound is a loop invariant, so it will be only computed
5372          once.  */
5373       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5374     }
5375   else
5376     elim_cost = infinite_cost;
5377
5378   /* Try expressing the original giv.  If it is compared with an invariant,
5379      note that we cannot get rid of it.  */
5380   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5381                               NULL, &cmp_iv);
5382   gcc_assert (ok);
5383
5384   /* When the condition is a comparison of the candidate IV against
5385      zero, prefer this IV.
5386
5387      TODO: The constant that we're subtracting from the cost should
5388      be target-dependent.  This information should be added to the
5389      target costs for each backend.  */
5390   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5391       && integer_zerop (*bound_cst)
5392       && (operand_equal_p (*control_var, cand->var_after, 0)
5393           || operand_equal_p (*control_var, cand->var_before, 0)))
5394     elim_cost.cost -= 1;
5395
5396   express_cost = get_computation_cost (data, use, cand, false,
5397                                        &depends_on_express, NULL,
5398                                        &express_inv_expr_id);
5399   fd_ivopts_data = data;
5400   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5401
5402   /* Count the cost of the original bound as well.  */
5403   bound_cost = force_var_cost (data, *bound_cst, NULL);
5404   if (bound_cost.cost == 0)
5405     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5406   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5407     bound_cost.cost = 0;
5408   express_cost.cost += bound_cost.cost;
5409
5410   /* Choose the better approach, preferring the eliminated IV. */
5411   if (compare_costs (elim_cost, express_cost) <= 0)
5412     {
5413       cost = elim_cost;
5414       depends_on = depends_on_elim;
5415       depends_on_elim = NULL;
5416       inv_expr_id = elim_inv_expr_id;
5417     }
5418   else
5419     {
5420       cost = express_cost;
5421       depends_on = depends_on_express;
5422       depends_on_express = NULL;
5423       bound = NULL_TREE;
5424       comp = ERROR_MARK;
5425       inv_expr_id = express_inv_expr_id;
5426     }
5427
5428   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5429
5430   if (depends_on_elim)
5431     BITMAP_FREE (depends_on_elim);
5432   if (depends_on_express)
5433     BITMAP_FREE (depends_on_express);
5434
5435   return !infinite_cost_p (cost);
5436 }
5437
5438 /* Determines cost of basing replacement of USE on CAND.  Returns false
5439    if USE cannot be based on CAND.  */
5440
5441 static bool
5442 determine_use_iv_cost (struct ivopts_data *data,
5443                        struct iv_use *use, struct iv_cand *cand)
5444 {
5445   switch (use->type)
5446     {
5447     case USE_NONLINEAR_EXPR:
5448       return determine_use_iv_cost_generic (data, use, cand);
5449
5450     case USE_ADDRESS:
5451       return determine_use_iv_cost_address (data, use, cand);
5452
5453     case USE_COMPARE:
5454       return determine_use_iv_cost_condition (data, use, cand);
5455
5456     default:
5457       gcc_unreachable ();
5458     }
5459 }
5460
5461 /* Return true if get_computation_cost indicates that autoincrement is
5462    a possibility for the pair of USE and CAND, false otherwise.  */
5463
5464 static bool
5465 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5466                            struct iv_cand *cand)
5467 {
5468   bitmap depends_on;
5469   bool can_autoinc;
5470   comp_cost cost;
5471
5472   if (use->type != USE_ADDRESS)
5473     return false;
5474
5475   cost = get_computation_cost (data, use, cand, true, &depends_on,
5476                                &can_autoinc, NULL);
5477
5478   BITMAP_FREE (depends_on);
5479
5480   return !infinite_cost_p (cost) && can_autoinc;
5481 }
5482
5483 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5484    use that allows autoincrement, and set their AINC_USE if possible.  */
5485
5486 static void
5487 set_autoinc_for_original_candidates (struct ivopts_data *data)
5488 {
5489   unsigned i, j;
5490
5491   for (i = 0; i < n_iv_cands (data); i++)
5492     {
5493       struct iv_cand *cand = iv_cand (data, i);
5494       struct iv_use *closest_before = NULL;
5495       struct iv_use *closest_after = NULL;
5496       if (cand->pos != IP_ORIGINAL)
5497         continue;
5498
5499       for (j = 0; j < n_iv_uses (data); j++)
5500         {
5501           struct iv_use *use = iv_use (data, j);
5502           unsigned uid = gimple_uid (use->stmt);
5503
5504           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5505             continue;
5506
5507           if (uid < gimple_uid (cand->incremented_at)
5508               && (closest_before == NULL
5509                   || uid > gimple_uid (closest_before->stmt)))
5510             closest_before = use;
5511
5512           if (uid > gimple_uid (cand->incremented_at)
5513               && (closest_after == NULL
5514                   || uid < gimple_uid (closest_after->stmt)))
5515             closest_after = use;
5516         }
5517
5518       if (closest_before != NULL
5519           && autoinc_possible_for_pair (data, closest_before, cand))
5520         cand->ainc_use = closest_before;
5521       else if (closest_after != NULL
5522                && autoinc_possible_for_pair (data, closest_after, cand))
5523         cand->ainc_use = closest_after;
5524     }
5525 }
5526
5527 /* Finds the candidates for the induction variables.  */
5528
5529 static void
5530 find_iv_candidates (struct ivopts_data *data)
5531 {
5532   /* Add commonly used ivs.  */
5533   add_standard_iv_candidates (data);
5534
5535   /* Add old induction variables.  */
5536   add_iv_candidate_for_bivs (data);
5537
5538   /* Add induction variables derived from uses.  */
5539   add_iv_candidate_for_uses (data);
5540
5541   set_autoinc_for_original_candidates (data);
5542
5543   /* Record the important candidates.  */
5544   record_important_candidates (data);
5545 }
5546
5547 /* Determines costs of basing the use of the iv on an iv candidate.  */
5548
5549 static void
5550 determine_use_iv_costs (struct ivopts_data *data)
5551 {
5552   unsigned i, j;
5553   struct iv_use *use;
5554   struct iv_cand *cand;
5555   bitmap to_clear = BITMAP_ALLOC (NULL);
5556
5557   alloc_use_cost_map (data);
5558
5559   for (i = 0; i < n_iv_uses (data); i++)
5560     {
5561       use = iv_use (data, i);
5562
5563       if (data->consider_all_candidates)
5564         {
5565           for (j = 0; j < n_iv_cands (data); j++)
5566             {
5567               cand = iv_cand (data, j);
5568               determine_use_iv_cost (data, use, cand);
5569             }
5570         }
5571       else
5572         {
5573           bitmap_iterator bi;
5574
5575           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5576             {
5577               cand = iv_cand (data, j);
5578               if (!determine_use_iv_cost (data, use, cand))
5579                 bitmap_set_bit (to_clear, j);
5580             }
5581
5582           /* Remove the candidates for that the cost is infinite from
5583              the list of related candidates.  */
5584           bitmap_and_compl_into (use->related_cands, to_clear);
5585           bitmap_clear (to_clear);
5586         }
5587     }
5588
5589   BITMAP_FREE (to_clear);
5590
5591   if (dump_file && (dump_flags & TDF_DETAILS))
5592     {
5593       fprintf (dump_file, "Use-candidate costs:\n");
5594
5595       for (i = 0; i < n_iv_uses (data); i++)
5596         {
5597           use = iv_use (data, i);
5598
5599           fprintf (dump_file, "Use %d:\n", i);
5600           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5601           for (j = 0; j < use->n_map_members; j++)
5602             {
5603               if (!use->cost_map[j].cand
5604                   || infinite_cost_p (use->cost_map[j].cost))
5605                 continue;
5606
5607               fprintf (dump_file, "  %d\t%d\t%d\t",
5608                        use->cost_map[j].cand->id,
5609                        use->cost_map[j].cost.cost,
5610                        use->cost_map[j].cost.complexity);
5611               if (use->cost_map[j].depends_on)
5612                 bitmap_print (dump_file,
5613                               use->cost_map[j].depends_on, "","");
5614               if (use->cost_map[j].inv_expr_id != -1)
5615                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5616               fprintf (dump_file, "\n");
5617             }
5618
5619           fprintf (dump_file, "\n");
5620         }
5621       fprintf (dump_file, "\n");
5622     }
5623 }
5624
5625 /* Determines cost of the candidate CAND.  */
5626
5627 static void
5628 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5629 {
5630   comp_cost cost_base;
5631   unsigned cost, cost_step;
5632   tree base;
5633
5634   if (!cand->iv)
5635     {
5636       cand->cost = 0;
5637       return;
5638     }
5639
5640   /* There are two costs associated with the candidate -- its increment
5641      and its initialization.  The second is almost negligible for any loop
5642      that rolls enough, so we take it just very little into account.  */
5643
5644   base = cand->iv->base;
5645   cost_base = force_var_cost (data, base, NULL);
5646   /* It will be exceptional that the iv register happens to be initialized with
5647      the proper value at no cost.  In general, there will at least be a regcopy
5648      or a const set.  */
5649   if (cost_base.cost == 0)
5650     cost_base.cost = COSTS_N_INSNS (1);
5651   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5652
5653   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5654
5655   /* Prefer the original ivs unless we may gain something by replacing it.
5656      The reason is to make debugging simpler; so this is not relevant for
5657      artificial ivs created by other optimization passes.  */
5658   if (cand->pos != IP_ORIGINAL
5659       || !SSA_NAME_VAR (cand->var_before)
5660       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5661     cost++;
5662
5663   /* Prefer not to insert statements into latch unless there are some
5664      already (so that we do not create unnecessary jumps).  */
5665   if (cand->pos == IP_END
5666       && empty_block_p (ip_end_pos (data->current_loop)))
5667     cost++;
5668
5669   cand->cost = cost;
5670   cand->cost_step = cost_step;
5671 }
5672
5673 /* Determines costs of computation of the candidates.  */
5674
5675 static void
5676 determine_iv_costs (struct ivopts_data *data)
5677 {
5678   unsigned i;
5679
5680   if (dump_file && (dump_flags & TDF_DETAILS))
5681     {
5682       fprintf (dump_file, "Candidate costs:\n");
5683       fprintf (dump_file, "  cand\tcost\n");
5684     }
5685
5686   for (i = 0; i < n_iv_cands (data); i++)
5687     {
5688       struct iv_cand *cand = iv_cand (data, i);
5689
5690       determine_iv_cost (data, cand);
5691
5692       if (dump_file && (dump_flags & TDF_DETAILS))
5693         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5694     }
5695
5696   if (dump_file && (dump_flags & TDF_DETAILS))
5697     fprintf (dump_file, "\n");
5698 }
5699
5700 /* Calculates cost for having SIZE induction variables.  */
5701
5702 static unsigned
5703 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5704 {
5705   /* We add size to the cost, so that we prefer eliminating ivs
5706      if possible.  */
5707   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5708                                             data->body_includes_call);
5709 }
5710
5711 /* For each size of the induction variable set determine the penalty.  */
5712
5713 static void
5714 determine_set_costs (struct ivopts_data *data)
5715 {
5716   unsigned j, n;
5717   gphi *phi;
5718   gphi_iterator psi;
5719   tree op;
5720   struct loop *loop = data->current_loop;
5721   bitmap_iterator bi;
5722
5723   if (dump_file && (dump_flags & TDF_DETAILS))
5724     {
5725       fprintf (dump_file, "Global costs:\n");
5726       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5727       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5728       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5729       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5730     }
5731
5732   n = 0;
5733   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5734     {
5735       phi = psi.phi ();
5736       op = PHI_RESULT (phi);
5737
5738       if (virtual_operand_p (op))
5739         continue;
5740
5741       if (get_iv (data, op))
5742         continue;
5743
5744       n++;
5745     }
5746
5747   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5748     {
5749       struct version_info *info = ver_info (data, j);
5750
5751       if (info->inv_id && info->has_nonlin_use)
5752         n++;
5753     }
5754
5755   data->regs_used = n;
5756   if (dump_file && (dump_flags & TDF_DETAILS))
5757     fprintf (dump_file, "  regs_used %d\n", n);
5758
5759   if (dump_file && (dump_flags & TDF_DETAILS))
5760     {
5761       fprintf (dump_file, "  cost for size:\n");
5762       fprintf (dump_file, "  ivs\tcost\n");
5763       for (j = 0; j <= 2 * target_avail_regs; j++)
5764         fprintf (dump_file, "  %d\t%d\n", j,
5765                  ivopts_global_cost_for_size (data, j));
5766       fprintf (dump_file, "\n");
5767     }
5768 }
5769
5770 /* Returns true if A is a cheaper cost pair than B.  */
5771
5772 static bool
5773 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5774 {
5775   int cmp;
5776
5777   if (!a)
5778     return false;
5779
5780   if (!b)
5781     return true;
5782
5783   cmp = compare_costs (a->cost, b->cost);
5784   if (cmp < 0)
5785     return true;
5786
5787   if (cmp > 0)
5788     return false;
5789
5790   /* In case the costs are the same, prefer the cheaper candidate.  */
5791   if (a->cand->cost < b->cand->cost)
5792     return true;
5793
5794   return false;
5795 }
5796
5797
5798 /* Returns candidate by that USE is expressed in IVS.  */
5799
5800 static struct cost_pair *
5801 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5802 {
5803   return ivs->cand_for_use[use->id];
5804 }
5805
5806 /* Computes the cost field of IVS structure.  */
5807
5808 static void
5809 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5810 {
5811   comp_cost cost = ivs->cand_use_cost;
5812
5813   cost.cost += ivs->cand_cost;
5814
5815   cost.cost += ivopts_global_cost_for_size (data,
5816                                             ivs->n_regs + ivs->num_used_inv_expr);
5817
5818   ivs->cost = cost;
5819 }
5820
5821 /* Remove invariants in set INVS to set IVS.  */
5822
5823 static void
5824 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5825 {
5826   bitmap_iterator bi;
5827   unsigned iid;
5828
5829   if (!invs)
5830     return;
5831
5832   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5833     {
5834       ivs->n_invariant_uses[iid]--;
5835       if (ivs->n_invariant_uses[iid] == 0)
5836         ivs->n_regs--;
5837     }
5838 }
5839
5840 /* Set USE not to be expressed by any candidate in IVS.  */
5841
5842 static void
5843 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5844                  struct iv_use *use)
5845 {
5846   unsigned uid = use->id, cid;
5847   struct cost_pair *cp;
5848
5849   cp = ivs->cand_for_use[uid];
5850   if (!cp)
5851     return;
5852   cid = cp->cand->id;
5853
5854   ivs->bad_uses++;
5855   ivs->cand_for_use[uid] = NULL;
5856   ivs->n_cand_uses[cid]--;
5857
5858   if (ivs->n_cand_uses[cid] == 0)
5859     {
5860       bitmap_clear_bit (ivs->cands, cid);
5861       /* Do not count the pseudocandidates.  */
5862       if (cp->cand->iv)
5863         ivs->n_regs--;
5864       ivs->n_cands--;
5865       ivs->cand_cost -= cp->cand->cost;
5866
5867       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5868     }
5869
5870   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5871
5872   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5873
5874   if (cp->inv_expr_id != -1)
5875     {
5876       ivs->used_inv_expr[cp->inv_expr_id]--;
5877       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5878         ivs->num_used_inv_expr--;
5879     }
5880   iv_ca_recount_cost (data, ivs);
5881 }
5882
5883 /* Add invariants in set INVS to set IVS.  */
5884
5885 static void
5886 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5887 {
5888   bitmap_iterator bi;
5889   unsigned iid;
5890
5891   if (!invs)
5892     return;
5893
5894   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5895     {
5896       ivs->n_invariant_uses[iid]++;
5897       if (ivs->n_invariant_uses[iid] == 1)
5898         ivs->n_regs++;
5899     }
5900 }
5901
5902 /* Set cost pair for USE in set IVS to CP.  */
5903
5904 static void
5905 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5906               struct iv_use *use, struct cost_pair *cp)
5907 {
5908   unsigned uid = use->id, cid;
5909
5910   if (ivs->cand_for_use[uid] == cp)
5911     return;
5912
5913   if (ivs->cand_for_use[uid])
5914     iv_ca_set_no_cp (data, ivs, use);
5915
5916   if (cp)
5917     {
5918       cid = cp->cand->id;
5919
5920       ivs->bad_uses--;
5921       ivs->cand_for_use[uid] = cp;
5922       ivs->n_cand_uses[cid]++;
5923       if (ivs->n_cand_uses[cid] == 1)
5924         {
5925           bitmap_set_bit (ivs->cands, cid);
5926           /* Do not count the pseudocandidates.  */
5927           if (cp->cand->iv)
5928             ivs->n_regs++;
5929           ivs->n_cands++;
5930           ivs->cand_cost += cp->cand->cost;
5931
5932           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5933         }
5934
5935       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5936       iv_ca_set_add_invariants (ivs, cp->depends_on);
5937
5938       if (cp->inv_expr_id != -1)
5939         {
5940           ivs->used_inv_expr[cp->inv_expr_id]++;
5941           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5942             ivs->num_used_inv_expr++;
5943         }
5944       iv_ca_recount_cost (data, ivs);
5945     }
5946 }
5947
5948 /* Extend set IVS by expressing USE by some of the candidates in it
5949    if possible.  Consider all important candidates if candidates in
5950    set IVS don't give any result.  */
5951
5952 static void
5953 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5954                struct iv_use *use)
5955 {
5956   struct cost_pair *best_cp = NULL, *cp;
5957   bitmap_iterator bi;
5958   unsigned i;
5959   struct iv_cand *cand;
5960
5961   gcc_assert (ivs->upto >= use->id);
5962   ivs->upto++;
5963   ivs->bad_uses++;
5964
5965   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5966     {
5967       cand = iv_cand (data, i);
5968       cp = get_use_iv_cost (data, use, cand);
5969       if (cheaper_cost_pair (cp, best_cp))
5970         best_cp = cp;
5971     }
5972
5973   if (best_cp == NULL)
5974     {
5975       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5976         {
5977           cand = iv_cand (data, i);
5978           cp = get_use_iv_cost (data, use, cand);
5979           if (cheaper_cost_pair (cp, best_cp))
5980             best_cp = cp;
5981         }
5982     }
5983
5984   iv_ca_set_cp (data, ivs, use, best_cp);
5985 }
5986
5987 /* Get cost for assignment IVS.  */
5988
5989 static comp_cost
5990 iv_ca_cost (struct iv_ca *ivs)
5991 {
5992   /* This was a conditional expression but it triggered a bug in
5993      Sun C 5.5.  */
5994   if (ivs->bad_uses)
5995     return infinite_cost;
5996   else
5997     return ivs->cost;
5998 }
5999
6000 /* Returns true if all dependences of CP are among invariants in IVS.  */
6001
6002 static bool
6003 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6004 {
6005   unsigned i;
6006   bitmap_iterator bi;
6007
6008   if (!cp->depends_on)
6009     return true;
6010
6011   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6012     {
6013       if (ivs->n_invariant_uses[i] == 0)
6014         return false;
6015     }
6016
6017   return true;
6018 }
6019
6020 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6021    it before NEXT_CHANGE.  */
6022
6023 static struct iv_ca_delta *
6024 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6025                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6026 {
6027   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6028
6029   change->use = use;
6030   change->old_cp = old_cp;
6031   change->new_cp = new_cp;
6032   change->next_change = next_change;
6033
6034   return change;
6035 }
6036
6037 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6038    are rewritten.  */
6039
6040 static struct iv_ca_delta *
6041 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6042 {
6043   struct iv_ca_delta *last;
6044
6045   if (!l2)
6046     return l1;
6047
6048   if (!l1)
6049     return l2;
6050
6051   for (last = l1; last->next_change; last = last->next_change)
6052     continue;
6053   last->next_change = l2;
6054
6055   return l1;
6056 }
6057
6058 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6059
6060 static struct iv_ca_delta *
6061 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6062 {
6063   struct iv_ca_delta *act, *next, *prev = NULL;
6064
6065   for (act = delta; act; act = next)
6066     {
6067       next = act->next_change;
6068       act->next_change = prev;
6069       prev = act;
6070
6071       std::swap (act->old_cp, act->new_cp);
6072     }
6073
6074   return prev;
6075 }
6076
6077 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6078    reverted instead.  */
6079
6080 static void
6081 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6082                     struct iv_ca_delta *delta, bool forward)
6083 {
6084   struct cost_pair *from, *to;
6085   struct iv_ca_delta *act;
6086
6087   if (!forward)
6088     delta = iv_ca_delta_reverse (delta);
6089
6090   for (act = delta; act; act = act->next_change)
6091     {
6092       from = act->old_cp;
6093       to = act->new_cp;
6094       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6095       iv_ca_set_cp (data, ivs, act->use, to);
6096     }
6097
6098   if (!forward)
6099     iv_ca_delta_reverse (delta);
6100 }
6101
6102 /* Returns true if CAND is used in IVS.  */
6103
6104 static bool
6105 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6106 {
6107   return ivs->n_cand_uses[cand->id] > 0;
6108 }
6109
6110 /* Returns number of induction variable candidates in the set IVS.  */
6111
6112 static unsigned
6113 iv_ca_n_cands (struct iv_ca *ivs)
6114 {
6115   return ivs->n_cands;
6116 }
6117
6118 /* Free the list of changes DELTA.  */
6119
6120 static void
6121 iv_ca_delta_free (struct iv_ca_delta **delta)
6122 {
6123   struct iv_ca_delta *act, *next;
6124
6125   for (act = *delta; act; act = next)
6126     {
6127       next = act->next_change;
6128       free (act);
6129     }
6130
6131   *delta = NULL;
6132 }
6133
6134 /* Allocates new iv candidates assignment.  */
6135
6136 static struct iv_ca *
6137 iv_ca_new (struct ivopts_data *data)
6138 {
6139   struct iv_ca *nw = XNEW (struct iv_ca);
6140
6141   nw->upto = 0;
6142   nw->bad_uses = 0;
6143   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6144   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6145   nw->cands = BITMAP_ALLOC (NULL);
6146   nw->n_cands = 0;
6147   nw->n_regs = 0;
6148   nw->cand_use_cost = no_cost;
6149   nw->cand_cost = 0;
6150   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6151   nw->cost = no_cost;
6152   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6153   nw->num_used_inv_expr = 0;
6154
6155   return nw;
6156 }
6157
6158 /* Free memory occupied by the set IVS.  */
6159
6160 static void
6161 iv_ca_free (struct iv_ca **ivs)
6162 {
6163   free ((*ivs)->cand_for_use);
6164   free ((*ivs)->n_cand_uses);
6165   BITMAP_FREE ((*ivs)->cands);
6166   free ((*ivs)->n_invariant_uses);
6167   free ((*ivs)->used_inv_expr);
6168   free (*ivs);
6169   *ivs = NULL;
6170 }
6171
6172 /* Dumps IVS to FILE.  */
6173
6174 static void
6175 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6176 {
6177   const char *pref = "  invariants ";
6178   unsigned i;
6179   comp_cost cost = iv_ca_cost (ivs);
6180
6181   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6182   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6183            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6184   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6185
6186    for (i = 0; i < ivs->upto; i++)
6187     {
6188       struct iv_use *use = iv_use (data, i);
6189       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6190       if (cp)
6191         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6192                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6193       else
6194         fprintf (file, "   use:%d --> ??\n", use->id);
6195     }
6196
6197   for (i = 1; i <= data->max_inv_id; i++)
6198     if (ivs->n_invariant_uses[i])
6199       {
6200         fprintf (file, "%s%d", pref, i);
6201         pref = ", ";
6202       }
6203   fprintf (file, "\n\n");
6204 }
6205
6206 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6207    new set, and store differences in DELTA.  Number of induction variables
6208    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6209    the function will try to find a solution with mimimal iv candidates.  */
6210
6211 static comp_cost
6212 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6213               struct iv_cand *cand, struct iv_ca_delta **delta,
6214               unsigned *n_ivs, bool min_ncand)
6215 {
6216   unsigned i;
6217   comp_cost cost;
6218   struct iv_use *use;
6219   struct cost_pair *old_cp, *new_cp;
6220
6221   *delta = NULL;
6222   for (i = 0; i < ivs->upto; i++)
6223     {
6224       use = iv_use (data, i);
6225       old_cp = iv_ca_cand_for_use (ivs, use);
6226
6227       if (old_cp
6228           && old_cp->cand == cand)
6229         continue;
6230
6231       new_cp = get_use_iv_cost (data, use, cand);
6232       if (!new_cp)
6233         continue;
6234
6235       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6236         continue;
6237
6238       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6239         continue;
6240
6241       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6242     }
6243
6244   iv_ca_delta_commit (data, ivs, *delta, true);
6245   cost = iv_ca_cost (ivs);
6246   if (n_ivs)
6247     *n_ivs = iv_ca_n_cands (ivs);
6248   iv_ca_delta_commit (data, ivs, *delta, false);
6249
6250   return cost;
6251 }
6252
6253 /* Try narrowing set IVS by removing CAND.  Return the cost of
6254    the new set and store the differences in DELTA.  START is
6255    the candidate with which we start narrowing.  */
6256
6257 static comp_cost
6258 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6259               struct iv_cand *cand, struct iv_cand *start,
6260               struct iv_ca_delta **delta)
6261 {
6262   unsigned i, ci;
6263   struct iv_use *use;
6264   struct cost_pair *old_cp, *new_cp, *cp;
6265   bitmap_iterator bi;
6266   struct iv_cand *cnd;
6267   comp_cost cost, best_cost, acost;
6268
6269   *delta = NULL;
6270   for (i = 0; i < n_iv_uses (data); i++)
6271     {
6272       use = iv_use (data, i);
6273
6274       old_cp = iv_ca_cand_for_use (ivs, use);
6275       if (old_cp->cand != cand)
6276         continue;
6277
6278       best_cost = iv_ca_cost (ivs);
6279       /* Start narrowing with START.  */
6280       new_cp = get_use_iv_cost (data, use, start);
6281
6282       if (data->consider_all_candidates)
6283         {
6284           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6285             {
6286               if (ci == cand->id || (start && ci == start->id))
6287                 continue;
6288
6289               cnd = iv_cand (data, ci);
6290
6291               cp = get_use_iv_cost (data, use, cnd);
6292               if (!cp)
6293                 continue;
6294
6295               iv_ca_set_cp (data, ivs, use, cp);
6296               acost = iv_ca_cost (ivs);
6297
6298               if (compare_costs (acost, best_cost) < 0)
6299                 {
6300                   best_cost = acost;
6301                   new_cp = cp;
6302                 }
6303             }
6304         }
6305       else
6306         {
6307           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6308             {
6309               if (ci == cand->id || (start && ci == start->id))
6310                 continue;
6311
6312               cnd = iv_cand (data, ci);
6313
6314               cp = get_use_iv_cost (data, use, cnd);
6315               if (!cp)
6316                 continue;
6317
6318               iv_ca_set_cp (data, ivs, use, cp);
6319               acost = iv_ca_cost (ivs);
6320
6321               if (compare_costs (acost, best_cost) < 0)
6322                 {
6323                   best_cost = acost;
6324                   new_cp = cp;
6325                 }
6326             }
6327         }
6328       /* Restore to old cp for use.  */
6329       iv_ca_set_cp (data, ivs, use, old_cp);
6330
6331       if (!new_cp)
6332         {
6333           iv_ca_delta_free (delta);
6334           return infinite_cost;
6335         }
6336
6337       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6338     }
6339
6340   iv_ca_delta_commit (data, ivs, *delta, true);
6341   cost = iv_ca_cost (ivs);
6342   iv_ca_delta_commit (data, ivs, *delta, false);
6343
6344   return cost;
6345 }
6346
6347 /* Try optimizing the set of candidates IVS by removing candidates different
6348    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6349    differences in DELTA.  */
6350
6351 static comp_cost
6352 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6353              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6354 {
6355   bitmap_iterator bi;
6356   struct iv_ca_delta *act_delta, *best_delta;
6357   unsigned i;
6358   comp_cost best_cost, acost;
6359   struct iv_cand *cand;
6360
6361   best_delta = NULL;
6362   best_cost = iv_ca_cost (ivs);
6363
6364   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6365     {
6366       cand = iv_cand (data, i);
6367
6368       if (cand == except_cand)
6369         continue;
6370
6371       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6372
6373       if (compare_costs (acost, best_cost) < 0)
6374         {
6375           best_cost = acost;
6376           iv_ca_delta_free (&best_delta);
6377           best_delta = act_delta;
6378         }
6379       else
6380         iv_ca_delta_free (&act_delta);
6381     }
6382
6383   if (!best_delta)
6384     {
6385       *delta = NULL;
6386       return best_cost;
6387     }
6388
6389   /* Recurse to possibly remove other unnecessary ivs.  */
6390   iv_ca_delta_commit (data, ivs, best_delta, true);
6391   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6392   iv_ca_delta_commit (data, ivs, best_delta, false);
6393   *delta = iv_ca_delta_join (best_delta, *delta);
6394   return best_cost;
6395 }
6396
6397 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6398    cheaper local cost for USE than BEST_CP.  Return pointer to
6399    the corresponding cost_pair, otherwise just return BEST_CP.  */
6400
6401 static struct cost_pair*
6402 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6403                         unsigned int cand_idx, struct iv_cand *old_cand,
6404                         struct cost_pair *best_cp)
6405 {
6406   struct iv_cand *cand;
6407   struct cost_pair *cp;
6408
6409   gcc_assert (old_cand != NULL && best_cp != NULL);
6410   if (cand_idx == old_cand->id)
6411     return best_cp;
6412
6413   cand = iv_cand (data, cand_idx);
6414   cp = get_use_iv_cost (data, use, cand);
6415   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6416     return cp;
6417
6418   return best_cp;
6419 }
6420
6421 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6422    which are used by more than one iv uses.  For each of those candidates,
6423    this function tries to represent iv uses under that candidate using
6424    other ones with lower local cost, then tries to prune the new set.
6425    If the new set has lower cost, It returns the new cost after recording
6426    candidate replacement in list DELTA.  */
6427
6428 static comp_cost
6429 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6430                struct iv_ca_delta **delta)
6431 {
6432   bitmap_iterator bi, bj;
6433   unsigned int i, j, k;
6434   struct iv_use *use;
6435   struct iv_cand *cand;
6436   comp_cost orig_cost, acost;
6437   struct iv_ca_delta *act_delta, *tmp_delta;
6438   struct cost_pair *old_cp, *best_cp = NULL;
6439
6440   *delta = NULL;
6441   orig_cost = iv_ca_cost (ivs);
6442
6443   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6444     {
6445       if (ivs->n_cand_uses[i] == 1
6446           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6447         continue;
6448
6449       cand = iv_cand (data, i);
6450
6451       act_delta = NULL;
6452       /*  Represent uses under current candidate using other ones with
6453           lower local cost.  */
6454       for (j = 0; j < ivs->upto; j++)
6455         {
6456           use = iv_use (data, j);
6457           old_cp = iv_ca_cand_for_use (ivs, use);
6458
6459           if (old_cp->cand != cand)
6460             continue;
6461
6462           best_cp = old_cp;
6463           if (data->consider_all_candidates)
6464             for (k = 0; k < n_iv_cands (data); k++)
6465               best_cp = cheaper_cost_with_cand (data, use, k,
6466                                                 old_cp->cand, best_cp);
6467           else
6468             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6469               best_cp = cheaper_cost_with_cand (data, use, k,
6470                                                 old_cp->cand, best_cp);
6471
6472           if (best_cp == old_cp)
6473             continue;
6474
6475           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6476         }
6477       /* No need for further prune.  */
6478       if (!act_delta)
6479         continue;
6480
6481       /* Prune the new candidate set.  */
6482       iv_ca_delta_commit (data, ivs, act_delta, true);
6483       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6484       iv_ca_delta_commit (data, ivs, act_delta, false);
6485       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6486
6487       if (compare_costs (acost, orig_cost) < 0)
6488         {
6489           *delta = act_delta;
6490           return acost;
6491         }
6492       else
6493         iv_ca_delta_free (&act_delta);
6494     }
6495
6496   return orig_cost;
6497 }
6498
6499 /* Tries to extend the sets IVS in the best possible way in order
6500    to express the USE.  If ORIGINALP is true, prefer candidates from
6501    the original set of IVs, otherwise favor important candidates not
6502    based on any memory object.  */
6503
6504 static bool
6505 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6506                   struct iv_use *use, bool originalp)
6507 {
6508   comp_cost best_cost, act_cost;
6509   unsigned i;
6510   bitmap_iterator bi;
6511   struct iv_cand *cand;
6512   struct iv_ca_delta *best_delta = NULL, *act_delta;
6513   struct cost_pair *cp;
6514
6515   iv_ca_add_use (data, ivs, use);
6516   best_cost = iv_ca_cost (ivs);
6517   cp = iv_ca_cand_for_use (ivs, use);
6518   if (cp)
6519     {
6520       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6521       iv_ca_set_no_cp (data, ivs, use);
6522     }
6523
6524   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6525      first try important candidates not based on any memory object.  Only if
6526      this fails, try the specific ones.  Rationale -- in loops with many
6527      variables the best choice often is to use just one generic biv.  If we
6528      added here many ivs specific to the uses, the optimization algorithm later
6529      would be likely to get stuck in a local minimum, thus causing us to create
6530      too many ivs.  The approach from few ivs to more seems more likely to be
6531      successful -- starting from few ivs, replacing an expensive use by a
6532      specific iv should always be a win.  */
6533   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6534     {
6535       cand = iv_cand (data, i);
6536
6537       if (originalp && cand->pos !=IP_ORIGINAL)
6538         continue;
6539
6540       if (!originalp && cand->iv->base_object != NULL_TREE)
6541         continue;
6542
6543       if (iv_ca_cand_used_p (ivs, cand))
6544         continue;
6545
6546       cp = get_use_iv_cost (data, use, cand);
6547       if (!cp)
6548         continue;
6549
6550       iv_ca_set_cp (data, ivs, use, cp);
6551       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6552                                true);
6553       iv_ca_set_no_cp (data, ivs, use);
6554       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6555
6556       if (compare_costs (act_cost, best_cost) < 0)
6557         {
6558           best_cost = act_cost;
6559
6560           iv_ca_delta_free (&best_delta);
6561           best_delta = act_delta;
6562         }
6563       else
6564         iv_ca_delta_free (&act_delta);
6565     }
6566
6567   if (infinite_cost_p (best_cost))
6568     {
6569       for (i = 0; i < use->n_map_members; i++)
6570         {
6571           cp = use->cost_map + i;
6572           cand = cp->cand;
6573           if (!cand)
6574             continue;
6575
6576           /* Already tried this.  */
6577           if (cand->important)
6578             {
6579               if (originalp && cand->pos == IP_ORIGINAL)
6580                 continue;
6581               if (!originalp && cand->iv->base_object == NULL_TREE)
6582                 continue;
6583             }
6584
6585           if (iv_ca_cand_used_p (ivs, cand))
6586             continue;
6587
6588           act_delta = NULL;
6589           iv_ca_set_cp (data, ivs, use, cp);
6590           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6591           iv_ca_set_no_cp (data, ivs, use);
6592           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6593                                        cp, act_delta);
6594
6595           if (compare_costs (act_cost, best_cost) < 0)
6596             {
6597               best_cost = act_cost;
6598
6599               if (best_delta)
6600                 iv_ca_delta_free (&best_delta);
6601               best_delta = act_delta;
6602             }
6603           else
6604             iv_ca_delta_free (&act_delta);
6605         }
6606     }
6607
6608   iv_ca_delta_commit (data, ivs, best_delta, true);
6609   iv_ca_delta_free (&best_delta);
6610
6611   return !infinite_cost_p (best_cost);
6612 }
6613
6614 /* Finds an initial assignment of candidates to uses.  */
6615
6616 static struct iv_ca *
6617 get_initial_solution (struct ivopts_data *data, bool originalp)
6618 {
6619   struct iv_ca *ivs = iv_ca_new (data);
6620   unsigned i;
6621
6622   for (i = 0; i < n_iv_uses (data); i++)
6623     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6624       {
6625         iv_ca_free (&ivs);
6626         return NULL;
6627       }
6628
6629   return ivs;
6630 }
6631
6632 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6633    points to a bool variable, this function tries to break local
6634    optimal fixed-point by replacing candidates in IVS if it's true.  */
6635
6636 static bool
6637 try_improve_iv_set (struct ivopts_data *data,
6638                     struct iv_ca *ivs, bool *try_replace_p)
6639 {
6640   unsigned i, n_ivs;
6641   comp_cost acost, best_cost = iv_ca_cost (ivs);
6642   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6643   struct iv_cand *cand;
6644
6645   /* Try extending the set of induction variables by one.  */
6646   for (i = 0; i < n_iv_cands (data); i++)
6647     {
6648       cand = iv_cand (data, i);
6649
6650       if (iv_ca_cand_used_p (ivs, cand))
6651         continue;
6652
6653       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6654       if (!act_delta)
6655         continue;
6656
6657       /* If we successfully added the candidate and the set is small enough,
6658          try optimizing it by removing other candidates.  */
6659       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6660         {
6661           iv_ca_delta_commit (data, ivs, act_delta, true);
6662           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6663           iv_ca_delta_commit (data, ivs, act_delta, false);
6664           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6665         }
6666
6667       if (compare_costs (acost, best_cost) < 0)
6668         {
6669           best_cost = acost;
6670           iv_ca_delta_free (&best_delta);
6671           best_delta = act_delta;
6672         }
6673       else
6674         iv_ca_delta_free (&act_delta);
6675     }
6676
6677   if (!best_delta)
6678     {
6679       /* Try removing the candidates from the set instead.  */
6680       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6681
6682       if (!best_delta && *try_replace_p)
6683         {
6684           *try_replace_p = false;
6685           /* So far candidate selecting algorithm tends to choose fewer IVs
6686              so that it can handle cases in which loops have many variables
6687              but the best choice is often to use only one general biv.  One
6688              weakness is it can't handle opposite cases, in which different
6689              candidates should be chosen with respect to each use.  To solve
6690              the problem, we replace candidates in a manner described by the
6691              comments of iv_ca_replace, thus give general algorithm a chance
6692              to break local optimal fixed-point in these cases.  */
6693           best_cost = iv_ca_replace (data, ivs, &best_delta);
6694         }
6695
6696       if (!best_delta)
6697         return false;
6698     }
6699
6700   iv_ca_delta_commit (data, ivs, best_delta, true);
6701   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6702   iv_ca_delta_free (&best_delta);
6703   return true;
6704 }
6705
6706 /* Attempts to find the optimal set of induction variables.  We do simple
6707    greedy heuristic -- we try to replace at most one candidate in the selected
6708    solution and remove the unused ivs while this improves the cost.  */
6709
6710 static struct iv_ca *
6711 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6712 {
6713   struct iv_ca *set;
6714   bool try_replace_p = true;
6715
6716   /* Get the initial solution.  */
6717   set = get_initial_solution (data, originalp);
6718   if (!set)
6719     {
6720       if (dump_file && (dump_flags & TDF_DETAILS))
6721         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6722       return NULL;
6723     }
6724
6725   if (dump_file && (dump_flags & TDF_DETAILS))
6726     {
6727       fprintf (dump_file, "Initial set of candidates:\n");
6728       iv_ca_dump (data, dump_file, set);
6729     }
6730
6731   while (try_improve_iv_set (data, set, &try_replace_p))
6732     {
6733       if (dump_file && (dump_flags & TDF_DETAILS))
6734         {
6735           fprintf (dump_file, "Improved to:\n");
6736           iv_ca_dump (data, dump_file, set);
6737         }
6738     }
6739
6740   return set;
6741 }
6742
6743 static struct iv_ca *
6744 find_optimal_iv_set (struct ivopts_data *data)
6745 {
6746   unsigned i;
6747   struct iv_ca *set, *origset;
6748   struct iv_use *use;
6749   comp_cost cost, origcost;
6750
6751   /* Determine the cost based on a strategy that starts with original IVs,
6752      and try again using a strategy that prefers candidates not based
6753      on any IVs.  */
6754   origset = find_optimal_iv_set_1 (data, true);
6755   set = find_optimal_iv_set_1 (data, false);
6756
6757   if (!origset && !set)
6758     return NULL;
6759
6760   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6761   cost = set ? iv_ca_cost (set) : infinite_cost;
6762
6763   if (dump_file && (dump_flags & TDF_DETAILS))
6764     {
6765       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6766                origcost.cost, origcost.complexity);
6767       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6768                cost.cost, cost.complexity);
6769     }
6770
6771   /* Choose the one with the best cost.  */
6772   if (compare_costs (origcost, cost) <= 0)
6773     {
6774       if (set)
6775         iv_ca_free (&set);
6776       set = origset;
6777     }
6778   else if (origset)
6779     iv_ca_free (&origset);
6780
6781   for (i = 0; i < n_iv_uses (data); i++)
6782     {
6783       use = iv_use (data, i);
6784       use->selected = iv_ca_cand_for_use (set, use)->cand;
6785     }
6786
6787   return set;
6788 }
6789
6790 /* Creates a new induction variable corresponding to CAND.  */
6791
6792 static void
6793 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6794 {
6795   gimple_stmt_iterator incr_pos;
6796   tree base;
6797   bool after = false;
6798
6799   if (!cand->iv)
6800     return;
6801
6802   switch (cand->pos)
6803     {
6804     case IP_NORMAL:
6805       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6806       break;
6807
6808     case IP_END:
6809       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6810       after = true;
6811       break;
6812
6813     case IP_AFTER_USE:
6814       after = true;
6815       /* fall through */
6816     case IP_BEFORE_USE:
6817       incr_pos = gsi_for_stmt (cand->incremented_at);
6818       break;
6819
6820     case IP_ORIGINAL:
6821       /* Mark that the iv is preserved.  */
6822       name_info (data, cand->var_before)->preserve_biv = true;
6823       name_info (data, cand->var_after)->preserve_biv = true;
6824
6825       /* Rewrite the increment so that it uses var_before directly.  */
6826       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6827       return;
6828     }
6829
6830   gimple_add_tmp_var (cand->var_before);
6831
6832   base = unshare_expr (cand->iv->base);
6833
6834   create_iv (base, unshare_expr (cand->iv->step),
6835              cand->var_before, data->current_loop,
6836              &incr_pos, after, &cand->var_before, &cand->var_after);
6837 }
6838
6839 /* Creates new induction variables described in SET.  */
6840
6841 static void
6842 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6843 {
6844   unsigned i;
6845   struct iv_cand *cand;
6846   bitmap_iterator bi;
6847
6848   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6849     {
6850       cand = iv_cand (data, i);
6851       create_new_iv (data, cand);
6852     }
6853
6854   if (dump_file && (dump_flags & TDF_DETAILS))
6855     {
6856       fprintf (dump_file, "Selected IV set for loop %d",
6857                data->current_loop->num);
6858       if (data->loop_loc != UNKNOWN_LOCATION)
6859         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6860                  LOCATION_LINE (data->loop_loc));
6861       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6862       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6863         {
6864           cand = iv_cand (data, i);
6865           dump_cand (dump_file, cand);
6866         }
6867       fprintf (dump_file, "\n");
6868     }
6869 }
6870
6871 /* Rewrites USE (definition of iv used in a nonlinear expression)
6872    using candidate CAND.  */
6873
6874 static void
6875 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6876                             struct iv_use *use, struct iv_cand *cand)
6877 {
6878   tree comp;
6879   tree op, tgt;
6880   gassign *ass;
6881   gimple_stmt_iterator bsi;
6882
6883   /* An important special case -- if we are asked to express value of
6884      the original iv by itself, just exit; there is no need to
6885      introduce a new computation (that might also need casting the
6886      variable to unsigned and back).  */
6887   if (cand->pos == IP_ORIGINAL
6888       && cand->incremented_at == use->stmt)
6889     {
6890       enum tree_code stmt_code;
6891
6892       gcc_assert (is_gimple_assign (use->stmt));
6893       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6894
6895       /* Check whether we may leave the computation unchanged.
6896          This is the case only if it does not rely on other
6897          computations in the loop -- otherwise, the computation
6898          we rely upon may be removed in remove_unused_ivs,
6899          thus leading to ICE.  */
6900       stmt_code = gimple_assign_rhs_code (use->stmt);
6901       if (stmt_code == PLUS_EXPR
6902           || stmt_code == MINUS_EXPR
6903           || stmt_code == POINTER_PLUS_EXPR)
6904         {
6905           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6906             op = gimple_assign_rhs2 (use->stmt);
6907           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6908             op = gimple_assign_rhs1 (use->stmt);
6909           else
6910             op = NULL_TREE;
6911         }
6912       else
6913         op = NULL_TREE;
6914
6915       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6916         return;
6917     }
6918
6919   comp = get_computation (data->current_loop, use, cand);
6920   gcc_assert (comp != NULL_TREE);
6921
6922   switch (gimple_code (use->stmt))
6923     {
6924     case GIMPLE_PHI:
6925       tgt = PHI_RESULT (use->stmt);
6926
6927       /* If we should keep the biv, do not replace it.  */
6928       if (name_info (data, tgt)->preserve_biv)
6929         return;
6930
6931       bsi = gsi_after_labels (gimple_bb (use->stmt));
6932       break;
6933
6934     case GIMPLE_ASSIGN:
6935       tgt = gimple_assign_lhs (use->stmt);
6936       bsi = gsi_for_stmt (use->stmt);
6937       break;
6938
6939     default:
6940       gcc_unreachable ();
6941     }
6942
6943   if (!valid_gimple_rhs_p (comp)
6944       || (gimple_code (use->stmt) != GIMPLE_PHI
6945           /* We can't allow re-allocating the stmt as it might be pointed
6946              to still.  */
6947           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6948               >= gimple_num_ops (gsi_stmt (bsi)))))
6949     {
6950       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6951                                        true, GSI_SAME_STMT);
6952       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6953         {
6954           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6955           /* As this isn't a plain copy we have to reset alignment
6956              information.  */
6957           if (SSA_NAME_PTR_INFO (comp))
6958             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6959         }
6960     }
6961
6962   if (gimple_code (use->stmt) == GIMPLE_PHI)
6963     {
6964       ass = gimple_build_assign (tgt, comp);
6965       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6966
6967       bsi = gsi_for_stmt (use->stmt);
6968       remove_phi_node (&bsi, false);
6969     }
6970   else
6971     {
6972       gimple_assign_set_rhs_from_tree (&bsi, comp);
6973       use->stmt = gsi_stmt (bsi);
6974     }
6975 }
6976
6977 /* Performs a peephole optimization to reorder the iv update statement with
6978    a mem ref to enable instruction combining in later phases. The mem ref uses
6979    the iv value before the update, so the reordering transformation requires
6980    adjustment of the offset. CAND is the selected IV_CAND.
6981
6982    Example:
6983
6984    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6985    iv2 = iv1 + 1;
6986
6987    if (t < val)      (1)
6988      goto L;
6989    goto Head;
6990
6991
6992    directly propagating t over to (1) will introduce overlapping live range
6993    thus increase register pressure. This peephole transform it into:
6994
6995
6996    iv2 = iv1 + 1;
6997    t = MEM_REF (base, iv2, 8, 8);
6998    if (t < val)
6999      goto L;
7000    goto Head;
7001 */
7002
7003 static void
7004 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7005 {
7006   tree var_after;
7007   gimple *iv_update, *stmt;
7008   basic_block bb;
7009   gimple_stmt_iterator gsi, gsi_iv;
7010
7011   if (cand->pos != IP_NORMAL)
7012     return;
7013
7014   var_after = cand->var_after;
7015   iv_update = SSA_NAME_DEF_STMT (var_after);
7016
7017   bb = gimple_bb (iv_update);
7018   gsi = gsi_last_nondebug_bb (bb);
7019   stmt = gsi_stmt (gsi);
7020
7021   /* Only handle conditional statement for now.  */
7022   if (gimple_code (stmt) != GIMPLE_COND)
7023     return;
7024
7025   gsi_prev_nondebug (&gsi);
7026   stmt = gsi_stmt (gsi);
7027   if (stmt != iv_update)
7028     return;
7029
7030   gsi_prev_nondebug (&gsi);
7031   if (gsi_end_p (gsi))
7032     return;
7033
7034   stmt = gsi_stmt (gsi);
7035   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7036     return;
7037
7038   if (stmt != use->stmt)
7039     return;
7040
7041   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7042     return;
7043
7044   if (dump_file && (dump_flags & TDF_DETAILS))
7045     {
7046       fprintf (dump_file, "Reordering \n");
7047       print_gimple_stmt (dump_file, iv_update, 0, 0);
7048       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7049       fprintf (dump_file, "\n");
7050     }
7051
7052   gsi = gsi_for_stmt (use->stmt);
7053   gsi_iv = gsi_for_stmt (iv_update);
7054   gsi_move_before (&gsi_iv, &gsi);
7055
7056   cand->pos = IP_BEFORE_USE;
7057   cand->incremented_at = use->stmt;
7058 }
7059
7060 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7061
7062 static void
7063 rewrite_use_address_1 (struct ivopts_data *data,
7064                        struct iv_use *use, struct iv_cand *cand)
7065 {
7066   aff_tree aff;
7067   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7068   tree base_hint = NULL_TREE;
7069   tree ref, iv;
7070   bool ok;
7071
7072   adjust_iv_update_pos (cand, use);
7073   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7074   gcc_assert (ok);
7075   unshare_aff_combination (&aff);
7076
7077   /* To avoid undefined overflow problems, all IV candidates use unsigned
7078      integer types.  The drawback is that this makes it impossible for
7079      create_mem_ref to distinguish an IV that is based on a memory object
7080      from one that represents simply an offset.
7081
7082      To work around this problem, we pass a hint to create_mem_ref that
7083      indicates which variable (if any) in aff is an IV based on a memory
7084      object.  Note that we only consider the candidate.  If this is not
7085      based on an object, the base of the reference is in some subexpression
7086      of the use -- but these will use pointer types, so they are recognized
7087      by the create_mem_ref heuristics anyway.  */
7088   if (cand->iv->base_object)
7089     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7090
7091   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7092   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7093                         reference_alias_ptr_type (*use->op_p),
7094                         iv, base_hint, data->speed);
7095   copy_ref_info (ref, *use->op_p);
7096   *use->op_p = ref;
7097 }
7098
7099 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7100    first use of a group, rewrites sub uses in the group too.  */
7101
7102 static void
7103 rewrite_use_address (struct ivopts_data *data,
7104                       struct iv_use *use, struct iv_cand *cand)
7105 {
7106   struct iv_use *next;
7107
7108   gcc_assert (use->sub_id == 0);
7109   rewrite_use_address_1 (data, use, cand);
7110   update_stmt (use->stmt);
7111
7112   for (next = use->next; next != NULL; next = next->next)
7113     {
7114       rewrite_use_address_1 (data, next, cand);
7115       update_stmt (next->stmt);
7116     }
7117
7118   return;
7119 }
7120
7121 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7122    candidate CAND.  */
7123
7124 static void
7125 rewrite_use_compare (struct ivopts_data *data,
7126                      struct iv_use *use, struct iv_cand *cand)
7127 {
7128   tree comp, *var_p, op, bound;
7129   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7130   enum tree_code compare;
7131   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7132   bool ok;
7133
7134   bound = cp->value;
7135   if (bound)
7136     {
7137       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7138       tree var_type = TREE_TYPE (var);
7139       gimple_seq stmts;
7140
7141       if (dump_file && (dump_flags & TDF_DETAILS))
7142         {
7143           fprintf (dump_file, "Replacing exit test: ");
7144           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7145         }
7146       compare = cp->comp;
7147       bound = unshare_expr (fold_convert (var_type, bound));
7148       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7149       if (stmts)
7150         gsi_insert_seq_on_edge_immediate (
7151                 loop_preheader_edge (data->current_loop),
7152                 stmts);
7153
7154       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7155       gimple_cond_set_lhs (cond_stmt, var);
7156       gimple_cond_set_code (cond_stmt, compare);
7157       gimple_cond_set_rhs (cond_stmt, op);
7158       return;
7159     }
7160
7161   /* The induction variable elimination failed; just express the original
7162      giv.  */
7163   comp = get_computation (data->current_loop, use, cand);
7164   gcc_assert (comp != NULL_TREE);
7165
7166   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7167   gcc_assert (ok);
7168
7169   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7170                                      true, GSI_SAME_STMT);
7171 }
7172
7173 /* Rewrites USE using candidate CAND.  */
7174
7175 static void
7176 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7177 {
7178   switch (use->type)
7179     {
7180       case USE_NONLINEAR_EXPR:
7181         rewrite_use_nonlinear_expr (data, use, cand);
7182         break;
7183
7184       case USE_ADDRESS:
7185         rewrite_use_address (data, use, cand);
7186         break;
7187
7188       case USE_COMPARE:
7189         rewrite_use_compare (data, use, cand);
7190         break;
7191
7192       default:
7193         gcc_unreachable ();
7194     }
7195
7196   update_stmt (use->stmt);
7197 }
7198
7199 /* Rewrite the uses using the selected induction variables.  */
7200
7201 static void
7202 rewrite_uses (struct ivopts_data *data)
7203 {
7204   unsigned i;
7205   struct iv_cand *cand;
7206   struct iv_use *use;
7207
7208   for (i = 0; i < n_iv_uses (data); i++)
7209     {
7210       use = iv_use (data, i);
7211       cand = use->selected;
7212       gcc_assert (cand);
7213
7214       rewrite_use (data, use, cand);
7215     }
7216 }
7217
7218 /* Removes the ivs that are not used after rewriting.  */
7219
7220 static void
7221 remove_unused_ivs (struct ivopts_data *data)
7222 {
7223   unsigned j;
7224   bitmap_iterator bi;
7225   bitmap toremove = BITMAP_ALLOC (NULL);
7226
7227   /* Figure out an order in which to release SSA DEFs so that we don't
7228      release something that we'd have to propagate into a debug stmt
7229      afterwards.  */
7230   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7231     {
7232       struct version_info *info;
7233
7234       info = ver_info (data, j);
7235       if (info->iv
7236           && !integer_zerop (info->iv->step)
7237           && !info->inv_id
7238           && !info->iv->have_use_for
7239           && !info->preserve_biv)
7240         {
7241           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7242
7243           tree def = info->iv->ssa_name;
7244
7245           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7246             {
7247               imm_use_iterator imm_iter;
7248               use_operand_p use_p;
7249               gimple *stmt;
7250               int count = 0;
7251
7252               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7253                 {
7254                   if (!gimple_debug_bind_p (stmt))
7255                     continue;
7256
7257                   /* We just want to determine whether to do nothing
7258                      (count == 0), to substitute the computed
7259                      expression into a single use of the SSA DEF by
7260                      itself (count == 1), or to use a debug temp
7261                      because the SSA DEF is used multiple times or as
7262                      part of a larger expression (count > 1). */
7263                   count++;
7264                   if (gimple_debug_bind_get_value (stmt) != def)
7265                     count++;
7266
7267                   if (count > 1)
7268                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7269                 }
7270
7271               if (!count)
7272                 continue;
7273
7274               struct iv_use dummy_use;
7275               struct iv_cand *best_cand = NULL, *cand;
7276               unsigned i, best_pref = 0, cand_pref;
7277
7278               memset (&dummy_use, 0, sizeof (dummy_use));
7279               dummy_use.iv = info->iv;
7280               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7281                 {
7282                   cand = iv_use (data, i)->selected;
7283                   if (cand == best_cand)
7284                     continue;
7285                   cand_pref = operand_equal_p (cand->iv->step,
7286                                                info->iv->step, 0)
7287                     ? 4 : 0;
7288                   cand_pref
7289                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7290                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7291                     ? 2 : 0;
7292                   cand_pref
7293                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7294                     ? 1 : 0;
7295                   if (best_cand == NULL || best_pref < cand_pref)
7296                     {
7297                       best_cand = cand;
7298                       best_pref = cand_pref;
7299                     }
7300                 }
7301
7302               if (!best_cand)
7303                 continue;
7304
7305               tree comp = get_computation_at (data->current_loop,
7306                                               &dummy_use, best_cand,
7307                                               SSA_NAME_DEF_STMT (def));
7308               if (!comp)
7309                 continue;
7310
7311               if (count > 1)
7312                 {
7313                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7314                   DECL_ARTIFICIAL (vexpr) = 1;
7315                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7316                   if (SSA_NAME_VAR (def))
7317                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7318                   else
7319                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7320                   gdebug *def_temp
7321                     = gimple_build_debug_bind (vexpr, comp, NULL);
7322                   gimple_stmt_iterator gsi;
7323
7324                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7325                     gsi = gsi_after_labels (gimple_bb
7326                                             (SSA_NAME_DEF_STMT (def)));
7327                   else
7328                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7329
7330                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7331                   comp = vexpr;
7332                 }
7333
7334               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7335                 {
7336                   if (!gimple_debug_bind_p (stmt))
7337                     continue;
7338
7339                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7340                     SET_USE (use_p, comp);
7341
7342                   update_stmt (stmt);
7343                 }
7344             }
7345         }
7346     }
7347
7348   release_defs_bitset (toremove);
7349
7350   BITMAP_FREE (toremove);
7351 }
7352
7353 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7354    for hash_map::traverse.  */
7355
7356 bool
7357 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7358 {
7359   free (value);
7360   return true;
7361 }
7362
7363 /* Frees data allocated by the optimization of a single loop.  */
7364
7365 static void
7366 free_loop_data (struct ivopts_data *data)
7367 {
7368   unsigned i, j;
7369   bitmap_iterator bi;
7370   tree obj;
7371
7372   if (data->niters)
7373     {
7374       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7375       delete data->niters;
7376       data->niters = NULL;
7377     }
7378
7379   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7380     {
7381       struct version_info *info;
7382
7383       info = ver_info (data, i);
7384       info->iv = NULL;
7385       info->has_nonlin_use = false;
7386       info->preserve_biv = false;
7387       info->inv_id = 0;
7388     }
7389   bitmap_clear (data->relevant);
7390   bitmap_clear (data->important_candidates);
7391
7392   for (i = 0; i < n_iv_uses (data); i++)
7393     {
7394       struct iv_use *use = iv_use (data, i);
7395       struct iv_use *pre = use, *sub = use->next;
7396
7397       while (sub)
7398         {
7399           gcc_assert (sub->related_cands == NULL);
7400           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7401
7402           pre = sub;
7403           sub = sub->next;
7404           free (pre);
7405         }
7406
7407       BITMAP_FREE (use->related_cands);
7408       for (j = 0; j < use->n_map_members; j++)
7409         if (use->cost_map[j].depends_on)
7410           BITMAP_FREE (use->cost_map[j].depends_on);
7411       free (use->cost_map);
7412       free (use);
7413     }
7414   data->iv_uses.truncate (0);
7415
7416   for (i = 0; i < n_iv_cands (data); i++)
7417     {
7418       struct iv_cand *cand = iv_cand (data, i);
7419
7420       if (cand->depends_on)
7421         BITMAP_FREE (cand->depends_on);
7422       free (cand);
7423     }
7424   data->iv_candidates.truncate (0);
7425
7426   if (data->version_info_size < num_ssa_names)
7427     {
7428       data->version_info_size = 2 * num_ssa_names;
7429       free (data->version_info);
7430       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7431     }
7432
7433   data->max_inv_id = 0;
7434
7435   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7436     SET_DECL_RTL (obj, NULL_RTX);
7437
7438   decl_rtl_to_reset.truncate (0);
7439
7440   data->inv_expr_tab->empty ();
7441   data->inv_expr_id = 0;
7442 }
7443
7444 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7445    loop tree.  */
7446
7447 static void
7448 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7449 {
7450   free_loop_data (data);
7451   free (data->version_info);
7452   BITMAP_FREE (data->relevant);
7453   BITMAP_FREE (data->important_candidates);
7454
7455   decl_rtl_to_reset.release ();
7456   data->iv_uses.release ();
7457   data->iv_candidates.release ();
7458   delete data->inv_expr_tab;
7459   data->inv_expr_tab = NULL;
7460   free_affine_expand_cache (&data->name_expansion_cache);
7461   obstack_free (&data->iv_obstack, NULL);
7462 }
7463
7464 /* Returns true if the loop body BODY includes any function calls.  */
7465
7466 static bool
7467 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7468 {
7469   gimple_stmt_iterator gsi;
7470   unsigned i;
7471
7472   for (i = 0; i < num_nodes; i++)
7473     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7474       {
7475         gimple *stmt = gsi_stmt (gsi);
7476         if (is_gimple_call (stmt)
7477             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7478           return true;
7479       }
7480   return false;
7481 }
7482
7483 /* Optimizes the LOOP.  Returns true if anything changed.  */
7484
7485 static bool
7486 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7487 {
7488   bool changed = false;
7489   struct iv_ca *iv_ca;
7490   edge exit = single_dom_exit (loop);
7491   basic_block *body;
7492
7493   gcc_assert (!data->niters);
7494   data->current_loop = loop;
7495   data->loop_loc = find_loop_location (loop);
7496   data->speed = optimize_loop_for_speed_p (loop);
7497
7498   if (dump_file && (dump_flags & TDF_DETAILS))
7499     {
7500       fprintf (dump_file, "Processing loop %d", loop->num);
7501       if (data->loop_loc != UNKNOWN_LOCATION)
7502         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7503                  LOCATION_LINE (data->loop_loc));
7504       fprintf (dump_file, "\n");
7505
7506       if (exit)
7507         {
7508           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7509                    exit->src->index, exit->dest->index);
7510           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7511           fprintf (dump_file, "\n");
7512         }
7513
7514       fprintf (dump_file, "\n");
7515     }
7516
7517   body = get_loop_body (loop);
7518   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7519   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7520   free (body);
7521
7522   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7523
7524   /* For each ssa name determines whether it behaves as an induction variable
7525      in some loop.  */
7526   if (!find_induction_variables (data))
7527     goto finish;
7528
7529   /* Finds interesting uses (item 1).  */
7530   find_interesting_uses (data);
7531   group_address_uses (data);
7532   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7533     goto finish;
7534
7535   /* Finds candidates for the induction variables (item 2).  */
7536   find_iv_candidates (data);
7537
7538   /* Calculates the costs (item 3, part 1).  */
7539   determine_iv_costs (data);
7540   determine_use_iv_costs (data);
7541   determine_set_costs (data);
7542
7543   /* Find the optimal set of induction variables (item 3, part 2).  */
7544   iv_ca = find_optimal_iv_set (data);
7545   if (!iv_ca)
7546     goto finish;
7547   changed = true;
7548
7549   /* Create the new induction variables (item 4, part 1).  */
7550   create_new_ivs (data, iv_ca);
7551   iv_ca_free (&iv_ca);
7552
7553   /* Rewrite the uses (item 4, part 2).  */
7554   rewrite_uses (data);
7555
7556   /* Remove the ivs that are unused after rewriting.  */
7557   remove_unused_ivs (data);
7558
7559   /* We have changed the structure of induction variables; it might happen
7560      that definitions in the scev database refer to some of them that were
7561      eliminated.  */
7562   scev_reset ();
7563
7564 finish:
7565   free_loop_data (data);
7566
7567   return changed;
7568 }
7569
7570 /* Main entry point.  Optimizes induction variables in loops.  */
7571
7572 void
7573 tree_ssa_iv_optimize (void)
7574 {
7575   struct loop *loop;
7576   struct ivopts_data data;
7577
7578   tree_ssa_iv_optimize_init (&data);
7579
7580   /* Optimize the loops starting with the innermost ones.  */
7581   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7582     {
7583       if (dump_file && (dump_flags & TDF_DETAILS))
7584         flow_loop_dump (loop, dump_file, NULL, 1);
7585
7586       tree_ssa_iv_optimize_loop (&data, loop);
7587     }
7588
7589   tree_ssa_iv_optimize_finalize (&data);
7590 }