gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "predict.h"
  72 #include "vec.h"
  73 #include "hashtab.h"
  74 #include "hash-set.h"
  75 #include "machmode.h"
  76 #include "hard-reg-set.h"
  77 #include "input.h"
  78 #include "function.h"
  79 #include "dominance.h"
  80 #include "cfg.h"
  81 #include "basic-block.h"
  82 #include "gimple-pretty-print.h"
  83 #include "hash-map.h"
  84 #include "hash-table.h"
  85 #include "tree-ssa-alias.h"
  86 #include "internal-fn.h"
  87 #include "tree-eh.h"
  88 #include "gimple-expr.h"
  89 #include "is-a.h"
  90 #include "gimple.h"
  91 #include "gimplify.h"
  92 #include "gimple-iterator.h"
  93 #include "gimplify-me.h"
  94 #include "gimple-ssa.h"
  95 #include "plugin-api.h"
  96 #include "ipa-ref.h"
  97 #include "cgraph.h"
  98 #include "tree-cfg.h"
  99 #include "tree-phinodes.h"
 100 #include "ssa-iterators.h"
 101 #include "stringpool.h"
 102 #include "tree-ssanames.h"
 103 #include "tree-ssa-loop-ivopts.h"
 104 #include "tree-ssa-loop-manip.h"
 105 #include "tree-ssa-loop-niter.h"
 106 #include "tree-ssa-loop.h"
 107 #include "expr.h"
 108 #include "tree-dfa.h"
 109 #include "tree-ssa.h"
 110 #include "cfgloop.h"
 111 #include "tree-pass.h"
 112 #include "insn-config.h"
 113 #include "tree-chrec.h"
 114 #include "tree-scalar-evolution.h"
 115 #include "cfgloop.h"
 116 #include "params.h"
 117 #include "langhooks.h"
 118 #include "tree-affine.h"
 119 #include "target.h"
 120 #include "tree-inline.h"
 121 #include "tree-ssa-propagate.h"
 122 #include "expmed.h"
 123 #include "tree-ssa-address.h"
 124 #include "builtins.h"
 125
 126 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 127    cost of different addressing modes.  This should be moved to a TBD
 128    interface between the GIMPLE and RTL worlds.  */
 129 #include "expr.h"
 130 #include "recog.h"
 131
 132 /* The infinite cost.  */
 133 #define INFTY 10000000
 134
 135 #define AVG_LOOP_NITER(LOOP) 5
 136
 137 /* Returns the expected number of loop iterations for LOOP.
 138    The average trip count is computed from profile data if it
 139    exists. */
 140
 141 static inline HOST_WIDE_INT
 142 avg_loop_niter (struct loop *loop)
 143 {
 144   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 145   if (niter == -1)
 146     return AVG_LOOP_NITER (loop);
 147
 148   return niter;
 149 }
 150
 151 /* Representation of the induction variable.  */
 152 struct iv
 153 {
 154   tree base;            /* Initial value of the iv.  */
 155   tree base_object;     /* A memory object to that the induction variable points.  */
 156   tree step;            /* Step of the iv (constant only).  */
 157   tree ssa_name;        /* The ssa name with the value.  */
 158   bool biv_p;           /* Is it a biv?  */
 159   bool have_use_for;    /* Do we already have a use for it?  */
 160   unsigned use_id;      /* The identifier in the use if it is the case.  */
 161 };
 162
 163 /* Per-ssa version information (induction variable descriptions, etc.).  */
 164 struct version_info
 165 {
 166   tree name;            /* The ssa name.  */
 167   struct iv *iv;        /* Induction variable description.  */
 168   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 169                            an expression that is not an induction variable.  */
 170   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 171   unsigned inv_id;      /* Id of an invariant.  */
 172 };
 173
 174 /* Types of uses.  */
 175 enum use_type
 176 {
 177   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 178   USE_ADDRESS,          /* Use in an address.  */
 179   USE_COMPARE           /* Use is a compare.  */
 180 };
 181
 182 /* Cost of a computation.  */
 183 typedef struct
 184 {
 185   int cost;             /* The runtime cost.  */
 186   unsigned complexity;  /* The estimate of the complexity of the code for
 187                            the computation (in no concrete units --
 188                            complexity field should be larger for more
 189                            complex expressions and addressing modes).  */
 190 } comp_cost;
 191
 192 static const comp_cost no_cost = {0, 0};
 193 static const comp_cost infinite_cost = {INFTY, INFTY};
 194
 195 /* The candidate - cost pair.  */
 196 struct cost_pair
 197 {
 198   struct iv_cand *cand; /* The candidate.  */
 199   comp_cost cost;       /* The cost.  */
 200   bitmap depends_on;    /* The list of invariants that have to be
 201                            preserved.  */
 202   tree value;           /* For final value elimination, the expression for
 203                            the final value of the iv.  For iv elimination,
 204                            the new bound to compare with.  */
 205   enum tree_code comp;  /* For iv elimination, the comparison.  */
 206   int inv_expr_id;      /* Loop invariant expression id.  */
 207 };
 208
 209 /* Use.  */
 210 struct iv_use
 211 {
 212   unsigned id;          /* The id of the use.  */
 213   enum use_type type;   /* Type of the use.  */
 214   struct iv *iv;        /* The induction variable it is based on.  */
 215   gimple stmt;          /* Statement in that it occurs.  */
 216   tree *op_p;           /* The place where it occurs.  */
 217   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 218                            important ones.  */
 219
 220   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 221   struct cost_pair *cost_map;
 222                         /* The costs wrto the iv candidates.  */
 223
 224   struct iv_cand *selected;
 225                         /* The selected candidate.  */
 226 };
 227
 228 /* The position where the iv is computed.  */
 229 enum iv_position
 230 {
 231   IP_NORMAL,            /* At the end, just before the exit condition.  */
 232   IP_END,               /* At the end of the latch block.  */
 233   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 234   IP_AFTER_USE,         /* Immediately after a specific use.  */
 235   IP_ORIGINAL           /* The original biv.  */
 236 };
 237
 238 /* The induction variable candidate.  */
 239 struct iv_cand
 240 {
 241   unsigned id;          /* The number of the candidate.  */
 242   bool important;       /* Whether this is an "important" candidate, i.e. such
 243                            that it should be considered by all uses.  */
 244   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 245   gimple incremented_at;/* For original biv, the statement where it is
 246                            incremented.  */
 247   tree var_before;      /* The variable used for it before increment.  */
 248   tree var_after;       /* The variable used for it after increment.  */
 249   struct iv *iv;        /* The value of the candidate.  NULL for
 250                            "pseudocandidate" used to indicate the possibility
 251                            to replace the final value of an iv by direct
 252                            computation of the value.  */
 253   unsigned cost;        /* Cost of the candidate.  */
 254   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 255   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 256                               where it is incremented.  */
 257   bitmap depends_on;    /* The list of invariants that are used in step of the
 258                            biv.  */
 259 };
 260
 261 /* Loop invariant expression hashtable entry.  */
 262 struct iv_inv_expr_ent
 263 {
 264   tree expr;
 265   int id;
 266   hashval_t hash;
 267 };
 268
 269 /* The data used by the induction variable optimizations.  */
 270
 271 typedef struct iv_use *iv_use_p;
 272
 273 typedef struct iv_cand *iv_cand_p;
 274
 275 /* Hashtable helpers.  */
 276
 277 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 278 {
 279   typedef iv_inv_expr_ent value_type;
 280   typedef iv_inv_expr_ent compare_type;
 281   static inline hashval_t hash (const value_type *);
 282   static inline bool equal (const value_type *, const compare_type *);
 283 };
 284
 285 /* Hash function for loop invariant expressions.  */
 286
 287 inline hashval_t
 288 iv_inv_expr_hasher::hash (const value_type *expr)
 289 {
 290   return expr->hash;
 291 }
 292
 293 /* Hash table equality function for expressions.  */
 294
 295 inline bool
 296 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 297 {
 298   return expr1->hash == expr2->hash
 299          && operand_equal_p (expr1->expr, expr2->expr, 0);
 300 }
 301
 302 struct ivopts_data
 303 {
 304   /* The currently optimized loop.  */
 305   struct loop *current_loop;
 306
 307   /* Numbers of iterations for all exits of the current loop.  */
 308   hash_map<edge, tree_niter_desc *> *niters;
 309
 310   /* Number of registers used in it.  */
 311   unsigned regs_used;
 312
 313   /* The size of version_info array allocated.  */
 314   unsigned version_info_size;
 315
 316   /* The array of information for the ssa names.  */
 317   struct version_info *version_info;
 318
 319   /* The hashtable of loop invariant expressions created
 320      by ivopt.  */
 321   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 322
 323   /* Loop invariant expression id.  */
 324   int inv_expr_id;
 325
 326   /* The bitmap of indices in version_info whose value was changed.  */
 327   bitmap relevant;
 328
 329   /* The uses of induction variables.  */
 330   vec<iv_use_p> iv_uses;
 331
 332   /* The candidates.  */
 333   vec<iv_cand_p> iv_candidates;
 334
 335   /* A bitmap of important candidates.  */
 336   bitmap important_candidates;
 337
 338   /* Cache used by tree_to_aff_combination_expand.  */
 339   hash_map<tree, name_expansion *> *name_expansion_cache;
 340
 341   /* The maximum invariant id.  */
 342   unsigned max_inv_id;
 343
 344   /* Whether to consider just related and important candidates when replacing a
 345      use.  */
 346   bool consider_all_candidates;
 347
 348   /* Are we optimizing for speed?  */
 349   bool speed;
 350
 351   /* Whether the loop body includes any function calls.  */
 352   bool body_includes_call;
 353
 354   /* Whether the loop body can only be exited via single exit.  */
 355   bool loop_single_exit_p;
 356 };
 357
 358 /* An assignment of iv candidates to uses.  */
 359
 360 struct iv_ca
 361 {
 362   /* The number of uses covered by the assignment.  */
 363   unsigned upto;
 364
 365   /* Number of uses that cannot be expressed by the candidates in the set.  */
 366   unsigned bad_uses;
 367
 368   /* Candidate assigned to a use, together with the related costs.  */
 369   struct cost_pair **cand_for_use;
 370
 371   /* Number of times each candidate is used.  */
 372   unsigned *n_cand_uses;
 373
 374   /* The candidates used.  */
 375   bitmap cands;
 376
 377   /* The number of candidates in the set.  */
 378   unsigned n_cands;
 379
 380   /* Total number of registers needed.  */
 381   unsigned n_regs;
 382
 383   /* Total cost of expressing uses.  */
 384   comp_cost cand_use_cost;
 385
 386   /* Total cost of candidates.  */
 387   unsigned cand_cost;
 388
 389   /* Number of times each invariant is used.  */
 390   unsigned *n_invariant_uses;
 391
 392   /* The array holding the number of uses of each loop
 393      invariant expressions created by ivopt.  */
 394   unsigned *used_inv_expr;
 395
 396   /* The number of created loop invariants.  */
 397   unsigned num_used_inv_expr;
 398
 399   /* Total cost of the assignment.  */
 400   comp_cost cost;
 401 };
 402
 403 /* Difference of two iv candidate assignments.  */
 404
 405 struct iv_ca_delta
 406 {
 407   /* Changed use.  */
 408   struct iv_use *use;
 409
 410   /* An old assignment (for rollback purposes).  */
 411   struct cost_pair *old_cp;
 412
 413   /* A new assignment.  */
 414   struct cost_pair *new_cp;
 415
 416   /* Next change in the list.  */
 417   struct iv_ca_delta *next_change;
 418 };
 419
 420 /* Bound on number of candidates below that all candidates are considered.  */
 421
 422 #define CONSIDER_ALL_CANDIDATES_BOUND \
 423   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 424
 425 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 426    optimizing such a loop would help, and it would take ages).  */
 427
 428 #define MAX_CONSIDERED_USES \
 429   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 430
 431 /* If there are at most this number of ivs in the set, try removing unnecessary
 432    ivs from the set always.  */
 433
 434 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 435   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 436
 437 /* The list of trees for that the decl_rtl field must be reset is stored
 438    here.  */
 439
 440 static vec<tree> decl_rtl_to_reset;
 441
 442 static comp_cost force_expr_to_var_cost (tree, bool);
 443
 444 /* Number of uses recorded in DATA.  */
 445
 446 static inline unsigned
 447 n_iv_uses (struct ivopts_data *data)
 448 {
 449   return data->iv_uses.length ();
 450 }
 451
 452 /* Ith use recorded in DATA.  */
 453
 454 static inline struct iv_use *
 455 iv_use (struct ivopts_data *data, unsigned i)
 456 {
 457   return data->iv_uses[i];
 458 }
 459
 460 /* Number of candidates recorded in DATA.  */
 461
 462 static inline unsigned
 463 n_iv_cands (struct ivopts_data *data)
 464 {
 465   return data->iv_candidates.length ();
 466 }
 467
 468 /* Ith candidate recorded in DATA.  */
 469
 470 static inline struct iv_cand *
 471 iv_cand (struct ivopts_data *data, unsigned i)
 472 {
 473   return data->iv_candidates[i];
 474 }
 475
 476 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 477
 478 edge
 479 single_dom_exit (struct loop *loop)
 480 {
 481   edge exit = single_exit (loop);
 482
 483   if (!exit)
 484     return NULL;
 485
 486   if (!just_once_each_iteration_p (loop, exit->src))
 487     return NULL;
 488
 489   return exit;
 490 }
 491
 492 /* Dumps information about the induction variable IV to FILE.  */
 493
 494 void
 495 dump_iv (FILE *file, struct iv *iv)
 496 {
 497   if (iv->ssa_name)
 498     {
 499       fprintf (file, "ssa name ");
 500       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 501       fprintf (file, "\n");
 502     }
 503
 504   fprintf (file, "  type ");
 505   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 506   fprintf (file, "\n");
 507
 508   if (iv->step)
 509     {
 510       fprintf (file, "  base ");
 511       print_generic_expr (file, iv->base, TDF_SLIM);
 512       fprintf (file, "\n");
 513
 514       fprintf (file, "  step ");
 515       print_generic_expr (file, iv->step, TDF_SLIM);
 516       fprintf (file, "\n");
 517     }
 518   else
 519     {
 520       fprintf (file, "  invariant ");
 521       print_generic_expr (file, iv->base, TDF_SLIM);
 522       fprintf (file, "\n");
 523     }
 524
 525   if (iv->base_object)
 526     {
 527       fprintf (file, "  base object ");
 528       print_generic_expr (file, iv->base_object, TDF_SLIM);
 529       fprintf (file, "\n");
 530     }
 531
 532   if (iv->biv_p)
 533     fprintf (file, "  is a biv\n");
 534 }
 535
 536 /* Dumps information about the USE to FILE.  */
 537
 538 void
 539 dump_use (FILE *file, struct iv_use *use)
 540 {
 541   fprintf (file, "use %d\n", use->id);
 542
 543   switch (use->type)
 544     {
 545     case USE_NONLINEAR_EXPR:
 546       fprintf (file, "  generic\n");
 547       break;
 548
 549     case USE_ADDRESS:
 550       fprintf (file, "  address\n");
 551       break;
 552
 553     case USE_COMPARE:
 554       fprintf (file, "  compare\n");
 555       break;
 556
 557     default:
 558       gcc_unreachable ();
 559     }
 560
 561   fprintf (file, "  in statement ");
 562   print_gimple_stmt (file, use->stmt, 0, 0);
 563   fprintf (file, "\n");
 564
 565   fprintf (file, "  at position ");
 566   if (use->op_p)
 567     print_generic_expr (file, *use->op_p, TDF_SLIM);
 568   fprintf (file, "\n");
 569
 570   dump_iv (file, use->iv);
 571
 572   if (use->related_cands)
 573     {
 574       fprintf (file, "  related candidates ");
 575       dump_bitmap (file, use->related_cands);
 576     }
 577 }
 578
 579 /* Dumps information about the uses to FILE.  */
 580
 581 void
 582 dump_uses (FILE *file, struct ivopts_data *data)
 583 {
 584   unsigned i;
 585   struct iv_use *use;
 586
 587   for (i = 0; i < n_iv_uses (data); i++)
 588     {
 589       use = iv_use (data, i);
 590
 591       dump_use (file, use);
 592       fprintf (file, "\n");
 593     }
 594 }
 595
 596 /* Dumps information about induction variable candidate CAND to FILE.  */
 597
 598 void
 599 dump_cand (FILE *file, struct iv_cand *cand)
 600 {
 601   struct iv *iv = cand->iv;
 602
 603   fprintf (file, "candidate %d%s\n",
 604            cand->id, cand->important ? " (important)" : "");
 605
 606   if (cand->depends_on)
 607     {
 608       fprintf (file, "  depends on ");
 609       dump_bitmap (file, cand->depends_on);
 610     }
 611
 612   if (!iv)
 613     {
 614       fprintf (file, "  final value replacement\n");
 615       return;
 616     }
 617
 618   if (cand->var_before)
 619     {
 620       fprintf (file, "  var_before ");
 621       print_generic_expr (file, cand->var_before, TDF_SLIM);
 622       fprintf (file, "\n");
 623     }
 624   if (cand->var_after)
 625     {
 626       fprintf (file, "  var_after ");
 627       print_generic_expr (file, cand->var_after, TDF_SLIM);
 628       fprintf (file, "\n");
 629     }
 630
 631   switch (cand->pos)
 632     {
 633     case IP_NORMAL:
 634       fprintf (file, "  incremented before exit test\n");
 635       break;
 636
 637     case IP_BEFORE_USE:
 638       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 639       break;
 640
 641     case IP_AFTER_USE:
 642       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 643       break;
 644
 645     case IP_END:
 646       fprintf (file, "  incremented at end\n");
 647       break;
 648
 649     case IP_ORIGINAL:
 650       fprintf (file, "  original biv\n");
 651       break;
 652     }
 653
 654   dump_iv (file, iv);
 655 }
 656
 657 /* Returns the info for ssa version VER.  */
 658
 659 static inline struct version_info *
 660 ver_info (struct ivopts_data *data, unsigned ver)
 661 {
 662   return data->version_info + ver;
 663 }
 664
 665 /* Returns the info for ssa name NAME.  */
 666
 667 static inline struct version_info *
 668 name_info (struct ivopts_data *data, tree name)
 669 {
 670   return ver_info (data, SSA_NAME_VERSION (name));
 671 }
 672
 673 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 674    emitted in LOOP.  */
 675
 676 static bool
 677 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 678 {
 679   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 680
 681   gcc_assert (bb);
 682
 683   if (sbb == loop->latch)
 684     return true;
 685
 686   if (sbb != bb)
 687     return false;
 688
 689   return stmt == last_stmt (bb);
 690 }
 691
 692 /* Returns true if STMT if after the place where the original induction
 693    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 694    if the positions are identical.  */
 695
 696 static bool
 697 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 698 {
 699   basic_block cand_bb = gimple_bb (cand->incremented_at);
 700   basic_block stmt_bb = gimple_bb (stmt);
 701
 702   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 703     return false;
 704
 705   if (stmt_bb != cand_bb)
 706     return true;
 707
 708   if (true_if_equal
 709       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 710     return true;
 711   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 712 }
 713
 714 /* Returns true if STMT if after the place where the induction variable
 715    CAND is incremented in LOOP.  */
 716
 717 static bool
 718 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 719 {
 720   switch (cand->pos)
 721     {
 722     case IP_END:
 723       return false;
 724
 725     case IP_NORMAL:
 726       return stmt_after_ip_normal_pos (loop, stmt);
 727
 728     case IP_ORIGINAL:
 729     case IP_AFTER_USE:
 730       return stmt_after_inc_pos (cand, stmt, false);
 731
 732     case IP_BEFORE_USE:
 733       return stmt_after_inc_pos (cand, stmt, true);
 734
 735     default:
 736       gcc_unreachable ();
 737     }
 738 }
 739
 740 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 741
 742 static bool
 743 abnormal_ssa_name_p (tree exp)
 744 {
 745   if (!exp)
 746     return false;
 747
 748   if (TREE_CODE (exp) != SSA_NAME)
 749     return false;
 750
 751   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 752 }
 753
 754 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 755    abnormal phi node.  Callback for for_each_index.  */
 756
 757 static bool
 758 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 759                                   void *data ATTRIBUTE_UNUSED)
 760 {
 761   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 762     {
 763       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 764         return false;
 765       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 766         return false;
 767     }
 768
 769   return !abnormal_ssa_name_p (*index);
 770 }
 771
 772 /* Returns true if EXPR contains a ssa name that occurs in an
 773    abnormal phi node.  */
 774
 775 bool
 776 contains_abnormal_ssa_name_p (tree expr)
 777 {
 778   enum tree_code code;
 779   enum tree_code_class codeclass;
 780
 781   if (!expr)
 782     return false;
 783
 784   code = TREE_CODE (expr);
 785   codeclass = TREE_CODE_CLASS (code);
 786
 787   if (code == SSA_NAME)
 788     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 789
 790   if (code == INTEGER_CST
 791       || is_gimple_min_invariant (expr))
 792     return false;
 793
 794   if (code == ADDR_EXPR)
 795     return !for_each_index (&TREE_OPERAND (expr, 0),
 796                             idx_contains_abnormal_ssa_name_p,
 797                             NULL);
 798
 799   if (code == COND_EXPR)
 800     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 801       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 802       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 803
 804   switch (codeclass)
 805     {
 806     case tcc_binary:
 807     case tcc_comparison:
 808       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 809         return true;
 810
 811       /* Fallthru.  */
 812     case tcc_unary:
 813       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 814         return true;
 815
 816       break;
 817
 818     default:
 819       gcc_unreachable ();
 820     }
 821
 822   return false;
 823 }
 824
 825 /*  Returns the structure describing number of iterations determined from
 826     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 827
 828 static struct tree_niter_desc *
 829 niter_for_exit (struct ivopts_data *data, edge exit)
 830 {
 831   struct tree_niter_desc *desc;
 832   tree_niter_desc **slot;
 833
 834   if (!data->niters)
 835     {
 836       data->niters = new hash_map<edge, tree_niter_desc *>;
 837       slot = NULL;
 838     }
 839   else
 840     slot = data->niters->get (exit);
 841
 842   if (!slot)
 843     {
 844       /* Try to determine number of iterations.  We cannot safely work with ssa
 845          names that appear in phi nodes on abnormal edges, so that we do not
 846          create overlapping life ranges for them (PR 27283).  */
 847       desc = XNEW (struct tree_niter_desc);
 848       if (!number_of_iterations_exit (data->current_loop,
 849                                       exit, desc, true)
 850           || contains_abnormal_ssa_name_p (desc->niter))
 851         {
 852           XDELETE (desc);
 853           desc = NULL;
 854         }
 855       data->niters->put (exit, desc);
 856     }
 857   else
 858     desc = *slot;
 859
 860   return desc;
 861 }
 862
 863 /* Returns the structure describing number of iterations determined from
 864    single dominating exit of DATA->current_loop, or NULL if something
 865    goes wrong.  */
 866
 867 static struct tree_niter_desc *
 868 niter_for_single_dom_exit (struct ivopts_data *data)
 869 {
 870   edge exit = single_dom_exit (data->current_loop);
 871
 872   if (!exit)
 873     return NULL;
 874
 875   return niter_for_exit (data, exit);
 876 }
 877
 878 /* Initializes data structures used by the iv optimization pass, stored
 879    in DATA.  */
 880
 881 static void
 882 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 883 {
 884   data->version_info_size = 2 * num_ssa_names;
 885   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 886   data->relevant = BITMAP_ALLOC (NULL);
 887   data->important_candidates = BITMAP_ALLOC (NULL);
 888   data->max_inv_id = 0;
 889   data->niters = NULL;
 890   data->iv_uses.create (20);
 891   data->iv_candidates.create (20);
 892   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 893   data->inv_expr_id = 0;
 894   data->name_expansion_cache = NULL;
 895   decl_rtl_to_reset.create (20);
 896 }
 897
 898 /* Returns a memory object to that EXPR points.  In case we are able to
 899    determine that it does not point to any such object, NULL is returned.  */
 900
 901 static tree
 902 determine_base_object (tree expr)
 903 {
 904   enum tree_code code = TREE_CODE (expr);
 905   tree base, obj;
 906
 907   /* If this is a pointer casted to any type, we need to determine
 908      the base object for the pointer; so handle conversions before
 909      throwing away non-pointer expressions.  */
 910   if (CONVERT_EXPR_P (expr))
 911     return determine_base_object (TREE_OPERAND (expr, 0));
 912
 913   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 914     return NULL_TREE;
 915
 916   switch (code)
 917     {
 918     case INTEGER_CST:
 919       return NULL_TREE;
 920
 921     case ADDR_EXPR:
 922       obj = TREE_OPERAND (expr, 0);
 923       base = get_base_address (obj);
 924
 925       if (!base)
 926         return expr;
 927
 928       if (TREE_CODE (base) == MEM_REF)
 929         return determine_base_object (TREE_OPERAND (base, 0));
 930
 931       return fold_convert (ptr_type_node,
 932                            build_fold_addr_expr (base));
 933
 934     case POINTER_PLUS_EXPR:
 935       return determine_base_object (TREE_OPERAND (expr, 0));
 936
 937     case PLUS_EXPR:
 938     case MINUS_EXPR:
 939       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 940       gcc_unreachable ();
 941
 942     default:
 943       return fold_convert (ptr_type_node, expr);
 944     }
 945 }
 946
 947 /* Return true if address expression with non-DECL_P operand appears
 948    in EXPR.  */
 949
 950 static bool
 951 contain_complex_addr_expr (tree expr)
 952 {
 953   bool res = false;
 954
 955   STRIP_NOPS (expr);
 956   switch (TREE_CODE (expr))
 957     {
 958     case POINTER_PLUS_EXPR:
 959     case PLUS_EXPR:
 960     case MINUS_EXPR:
 961       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 962       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 963       break;
 964
 965     case ADDR_EXPR:
 966       return (!DECL_P (TREE_OPERAND (expr, 0)));
 967
 968     default:
 969       return false;
 970     }
 971
 972   return res;
 973 }
 974
 975 /* Allocates an induction variable with given initial value BASE and step STEP
 976    for loop LOOP.  */
 977
 978 static struct iv *
 979 alloc_iv (tree base, tree step)
 980 {
 981   tree expr = base;
 982   struct iv *iv = XCNEW (struct iv);
 983   gcc_assert (step != NULL_TREE);
 984
 985   /* Lower address expression in base except ones with DECL_P as operand.
 986      By doing this:
 987        1) More accurate cost can be computed for address expressions;
 988        2) Duplicate candidates won't be created for bases in different
 989           forms, like &a[0] and &a.  */
 990   STRIP_NOPS (expr);
 991   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
 992       || contain_complex_addr_expr (expr))
 993     {
 994       aff_tree comb;
 995       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
 996       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 997     }
 998
 999   iv->base = base;
1000   iv->base_object = determine_base_object (base);
1001   iv->step = step;
1002   iv->biv_p = false;
1003   iv->have_use_for = false;
1004   iv->use_id = 0;
1005   iv->ssa_name = NULL_TREE;
1006
1007   return iv;
1008 }
1009
1010 /* Sets STEP and BASE for induction variable IV.  */
1011
1012 static void
1013 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1014 {
1015   struct version_info *info = name_info (data, iv);
1016
1017   gcc_assert (!info->iv);
1018
1019   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1020   info->iv = alloc_iv (base, step);
1021   info->iv->ssa_name = iv;
1022 }
1023
1024 /* Finds induction variable declaration for VAR.  */
1025
1026 static struct iv *
1027 get_iv (struct ivopts_data *data, tree var)
1028 {
1029   basic_block bb;
1030   tree type = TREE_TYPE (var);
1031
1032   if (!POINTER_TYPE_P (type)
1033       && !INTEGRAL_TYPE_P (type))
1034     return NULL;
1035
1036   if (!name_info (data, var)->iv)
1037     {
1038       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1039
1040       if (!bb
1041           || !flow_bb_inside_loop_p (data->current_loop, bb))
1042         set_iv (data, var, var, build_int_cst (type, 0));
1043     }
1044
1045   return name_info (data, var)->iv;
1046 }
1047
1048 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1049    not define a simple affine biv with nonzero step.  */
1050
1051 static tree
1052 determine_biv_step (gphi *phi)
1053 {
1054   struct loop *loop = gimple_bb (phi)->loop_father;
1055   tree name = PHI_RESULT (phi);
1056   affine_iv iv;
1057
1058   if (virtual_operand_p (name))
1059     return NULL_TREE;
1060
1061   if (!simple_iv (loop, loop, name, &iv, true))
1062     return NULL_TREE;
1063
1064   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1065 }
1066
1067 /* Finds basic ivs.  */
1068
1069 static bool
1070 find_bivs (struct ivopts_data *data)
1071 {
1072   gphi *phi;
1073   tree step, type, base;
1074   bool found = false;
1075   struct loop *loop = data->current_loop;
1076   gphi_iterator psi;
1077
1078   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1079     {
1080       phi = psi.phi ();
1081
1082       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1083         continue;
1084
1085       step = determine_biv_step (phi);
1086       if (!step)
1087         continue;
1088
1089       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1090       base = expand_simple_operations (base);
1091       if (contains_abnormal_ssa_name_p (base)
1092           || contains_abnormal_ssa_name_p (step))
1093         continue;
1094
1095       type = TREE_TYPE (PHI_RESULT (phi));
1096       base = fold_convert (type, base);
1097       if (step)
1098         {
1099           if (POINTER_TYPE_P (type))
1100             step = convert_to_ptrofftype (step);
1101           else
1102             step = fold_convert (type, step);
1103         }
1104
1105       set_iv (data, PHI_RESULT (phi), base, step);
1106       found = true;
1107     }
1108
1109   return found;
1110 }
1111
1112 /* Marks basic ivs.  */
1113
1114 static void
1115 mark_bivs (struct ivopts_data *data)
1116 {
1117   gphi *phi;
1118   gimple def;
1119   tree var;
1120   struct iv *iv, *incr_iv;
1121   struct loop *loop = data->current_loop;
1122   basic_block incr_bb;
1123   gphi_iterator psi;
1124
1125   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1126     {
1127       phi = psi.phi ();
1128
1129       iv = get_iv (data, PHI_RESULT (phi));
1130       if (!iv)
1131         continue;
1132
1133       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1134       def = SSA_NAME_DEF_STMT (var);
1135       /* Don't mark iv peeled from other one as biv.  */
1136       if (def
1137           && gimple_code (def) == GIMPLE_PHI
1138           && gimple_bb (def) == loop->header)
1139         continue;
1140
1141       incr_iv = get_iv (data, var);
1142       if (!incr_iv)
1143         continue;
1144
1145       /* If the increment is in the subloop, ignore it.  */
1146       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1147       if (incr_bb->loop_father != data->current_loop
1148           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1149         continue;
1150
1151       iv->biv_p = true;
1152       incr_iv->biv_p = true;
1153     }
1154 }
1155
1156 /* Checks whether STMT defines a linear induction variable and stores its
1157    parameters to IV.  */
1158
1159 static bool
1160 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1161 {
1162   tree lhs;
1163   struct loop *loop = data->current_loop;
1164
1165   iv->base = NULL_TREE;
1166   iv->step = NULL_TREE;
1167
1168   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1169     return false;
1170
1171   lhs = gimple_assign_lhs (stmt);
1172   if (TREE_CODE (lhs) != SSA_NAME)
1173     return false;
1174
1175   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1176     return false;
1177   iv->base = expand_simple_operations (iv->base);
1178
1179   if (contains_abnormal_ssa_name_p (iv->base)
1180       || contains_abnormal_ssa_name_p (iv->step))
1181     return false;
1182
1183   /* If STMT could throw, then do not consider STMT as defining a GIV.
1184      While this will suppress optimizations, we can not safely delete this
1185      GIV and associated statements, even if it appears it is not used.  */
1186   if (stmt_could_throw_p (stmt))
1187     return false;
1188
1189   return true;
1190 }
1191
1192 /* Finds general ivs in statement STMT.  */
1193
1194 static void
1195 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1196 {
1197   affine_iv iv;
1198
1199   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1200     return;
1201
1202   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1203 }
1204
1205 /* Finds general ivs in basic block BB.  */
1206
1207 static void
1208 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1209 {
1210   gimple_stmt_iterator bsi;
1211
1212   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1213     find_givs_in_stmt (data, gsi_stmt (bsi));
1214 }
1215
1216 /* Finds general ivs.  */
1217
1218 static void
1219 find_givs (struct ivopts_data *data)
1220 {
1221   struct loop *loop = data->current_loop;
1222   basic_block *body = get_loop_body_in_dom_order (loop);
1223   unsigned i;
1224
1225   for (i = 0; i < loop->num_nodes; i++)
1226     find_givs_in_bb (data, body[i]);
1227   free (body);
1228 }
1229
1230 /* For each ssa name defined in LOOP determines whether it is an induction
1231    variable and if so, its initial value and step.  */
1232
1233 static bool
1234 find_induction_variables (struct ivopts_data *data)
1235 {
1236   unsigned i;
1237   bitmap_iterator bi;
1238
1239   if (!find_bivs (data))
1240     return false;
1241
1242   find_givs (data);
1243   mark_bivs (data);
1244
1245   if (dump_file && (dump_flags & TDF_DETAILS))
1246     {
1247       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1248
1249       if (niter)
1250         {
1251           fprintf (dump_file, "  number of iterations ");
1252           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1253           if (!integer_zerop (niter->may_be_zero))
1254             {
1255               fprintf (dump_file, "; zero if ");
1256               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1257             }
1258           fprintf (dump_file, "\n\n");
1259         };
1260
1261       fprintf (dump_file, "Induction variables:\n\n");
1262
1263       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1264         {
1265           if (ver_info (data, i)->iv)
1266             dump_iv (dump_file, ver_info (data, i)->iv);
1267         }
1268     }
1269
1270   return true;
1271 }
1272
1273 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1274
1275 static struct iv_use *
1276 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1277             gimple stmt, enum use_type use_type)
1278 {
1279   struct iv_use *use = XCNEW (struct iv_use);
1280
1281   use->id = n_iv_uses (data);
1282   use->type = use_type;
1283   use->iv = iv;
1284   use->stmt = stmt;
1285   use->op_p = use_p;
1286   use->related_cands = BITMAP_ALLOC (NULL);
1287
1288   /* To avoid showing ssa name in the dumps, if it was not reset by the
1289      caller.  */
1290   iv->ssa_name = NULL_TREE;
1291
1292   if (dump_file && (dump_flags & TDF_DETAILS))
1293     dump_use (dump_file, use);
1294
1295   data->iv_uses.safe_push (use);
1296
1297   return use;
1298 }
1299
1300 /* Checks whether OP is a loop-level invariant and if so, records it.
1301    NONLINEAR_USE is true if the invariant is used in a way we do not
1302    handle specially.  */
1303
1304 static void
1305 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1306 {
1307   basic_block bb;
1308   struct version_info *info;
1309
1310   if (TREE_CODE (op) != SSA_NAME
1311       || virtual_operand_p (op))
1312     return;
1313
1314   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1315   if (bb
1316       && flow_bb_inside_loop_p (data->current_loop, bb))
1317     return;
1318
1319   info = name_info (data, op);
1320   info->name = op;
1321   info->has_nonlin_use |= nonlinear_use;
1322   if (!info->inv_id)
1323     info->inv_id = ++data->max_inv_id;
1324   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1325 }
1326
1327 /* Checks whether the use OP is interesting and if so, records it.  */
1328
1329 static struct iv_use *
1330 find_interesting_uses_op (struct ivopts_data *data, tree op)
1331 {
1332   struct iv *iv;
1333   struct iv *civ;
1334   gimple stmt;
1335   struct iv_use *use;
1336
1337   if (TREE_CODE (op) != SSA_NAME)
1338     return NULL;
1339
1340   iv = get_iv (data, op);
1341   if (!iv)
1342     return NULL;
1343
1344   if (iv->have_use_for)
1345     {
1346       use = iv_use (data, iv->use_id);
1347
1348       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1349       return use;
1350     }
1351
1352   if (integer_zerop (iv->step))
1353     {
1354       record_invariant (data, op, true);
1355       return NULL;
1356     }
1357   iv->have_use_for = true;
1358
1359   civ = XNEW (struct iv);
1360   *civ = *iv;
1361
1362   stmt = SSA_NAME_DEF_STMT (op);
1363   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1364               || is_gimple_assign (stmt));
1365
1366   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1367   iv->use_id = use->id;
1368
1369   return use;
1370 }
1371
1372 /* Given a condition in statement STMT, checks whether it is a compare
1373    of an induction variable and an invariant.  If this is the case,
1374    CONTROL_VAR is set to location of the iv, BOUND to the location of
1375    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1376    induction variable descriptions, and true is returned.  If this is not
1377    the case, CONTROL_VAR and BOUND are set to the arguments of the
1378    condition and false is returned.  */
1379
1380 static bool
1381 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1382                        tree **control_var, tree **bound,
1383                        struct iv **iv_var, struct iv **iv_bound)
1384 {
1385   /* The objects returned when COND has constant operands.  */
1386   static struct iv const_iv;
1387   static tree zero;
1388   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1389   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1390   bool ret = false;
1391
1392   if (gimple_code (stmt) == GIMPLE_COND)
1393     {
1394       gcond *cond_stmt = as_a <gcond *> (stmt);
1395       op0 = gimple_cond_lhs_ptr (cond_stmt);
1396       op1 = gimple_cond_rhs_ptr (cond_stmt);
1397     }
1398   else
1399     {
1400       op0 = gimple_assign_rhs1_ptr (stmt);
1401       op1 = gimple_assign_rhs2_ptr (stmt);
1402     }
1403
1404   zero = integer_zero_node;
1405   const_iv.step = integer_zero_node;
1406
1407   if (TREE_CODE (*op0) == SSA_NAME)
1408     iv0 = get_iv (data, *op0);
1409   if (TREE_CODE (*op1) == SSA_NAME)
1410     iv1 = get_iv (data, *op1);
1411
1412   /* Exactly one of the compared values must be an iv, and the other one must
1413      be an invariant.  */
1414   if (!iv0 || !iv1)
1415     goto end;
1416
1417   if (integer_zerop (iv0->step))
1418     {
1419       /* Control variable may be on the other side.  */
1420       tmp_op = op0; op0 = op1; op1 = tmp_op;
1421       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1422     }
1423   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1424
1425 end:
1426   if (control_var)
1427     *control_var = op0;;
1428   if (iv_var)
1429     *iv_var = iv0;;
1430   if (bound)
1431     *bound = op1;
1432   if (iv_bound)
1433     *iv_bound = iv1;
1434
1435   return ret;
1436 }
1437
1438 /* Checks whether the condition in STMT is interesting and if so,
1439    records it.  */
1440
1441 static void
1442 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1443 {
1444   tree *var_p, *bound_p;
1445   struct iv *var_iv, *civ;
1446
1447   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1448     {
1449       find_interesting_uses_op (data, *var_p);
1450       find_interesting_uses_op (data, *bound_p);
1451       return;
1452     }
1453
1454   civ = XNEW (struct iv);
1455   *civ = *var_iv;
1456   record_use (data, NULL, civ, stmt, USE_COMPARE);
1457 }
1458
1459 /* Returns the outermost loop EXPR is obviously invariant in
1460    relative to the loop LOOP, i.e. if all its operands are defined
1461    outside of the returned loop.  Returns NULL if EXPR is not
1462    even obviously invariant in LOOP.  */
1463
1464 struct loop *
1465 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1466 {
1467   basic_block def_bb;
1468   unsigned i, len;
1469
1470   if (is_gimple_min_invariant (expr))
1471     return current_loops->tree_root;
1472
1473   if (TREE_CODE (expr) == SSA_NAME)
1474     {
1475       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1476       if (def_bb)
1477         {
1478           if (flow_bb_inside_loop_p (loop, def_bb))
1479             return NULL;
1480           return superloop_at_depth (loop,
1481                                      loop_depth (def_bb->loop_father) + 1);
1482         }
1483
1484       return current_loops->tree_root;
1485     }
1486
1487   if (!EXPR_P (expr))
1488     return NULL;
1489
1490   unsigned maxdepth = 0;
1491   len = TREE_OPERAND_LENGTH (expr);
1492   for (i = 0; i < len; i++)
1493     {
1494       struct loop *ivloop;
1495       if (!TREE_OPERAND (expr, i))
1496         continue;
1497
1498       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1499       if (!ivloop)
1500         return NULL;
1501       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1502     }
1503
1504   return superloop_at_depth (loop, maxdepth);
1505 }
1506
1507 /* Returns true if expression EXPR is obviously invariant in LOOP,
1508    i.e. if all its operands are defined outside of the LOOP.  LOOP
1509    should not be the function body.  */
1510
1511 bool
1512 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1513 {
1514   basic_block def_bb;
1515   unsigned i, len;
1516
1517   gcc_assert (loop_depth (loop) > 0);
1518
1519   if (is_gimple_min_invariant (expr))
1520     return true;
1521
1522   if (TREE_CODE (expr) == SSA_NAME)
1523     {
1524       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1525       if (def_bb
1526           && flow_bb_inside_loop_p (loop, def_bb))
1527         return false;
1528
1529       return true;
1530     }
1531
1532   if (!EXPR_P (expr))
1533     return false;
1534
1535   len = TREE_OPERAND_LENGTH (expr);
1536   for (i = 0; i < len; i++)
1537     if (TREE_OPERAND (expr, i)
1538         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1539       return false;
1540
1541   return true;
1542 }
1543
1544 /* Cumulates the steps of indices into DATA and replaces their values with the
1545    initial ones.  Returns false when the value of the index cannot be determined.
1546    Callback for for_each_index.  */
1547
1548 struct ifs_ivopts_data
1549 {
1550   struct ivopts_data *ivopts_data;
1551   gimple stmt;
1552   tree step;
1553 };
1554
1555 static bool
1556 idx_find_step (tree base, tree *idx, void *data)
1557 {
1558   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1559   struct iv *iv;
1560   tree step, iv_base, iv_step, lbound, off;
1561   struct loop *loop = dta->ivopts_data->current_loop;
1562
1563   /* If base is a component ref, require that the offset of the reference
1564      be invariant.  */
1565   if (TREE_CODE (base) == COMPONENT_REF)
1566     {
1567       off = component_ref_field_offset (base);
1568       return expr_invariant_in_loop_p (loop, off);
1569     }
1570
1571   /* If base is array, first check whether we will be able to move the
1572      reference out of the loop (in order to take its address in strength
1573      reduction).  In order for this to work we need both lower bound
1574      and step to be loop invariants.  */
1575   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1576     {
1577       /* Moreover, for a range, the size needs to be invariant as well.  */
1578       if (TREE_CODE (base) == ARRAY_RANGE_REF
1579           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1580         return false;
1581
1582       step = array_ref_element_size (base);
1583       lbound = array_ref_low_bound (base);
1584
1585       if (!expr_invariant_in_loop_p (loop, step)
1586           || !expr_invariant_in_loop_p (loop, lbound))
1587         return false;
1588     }
1589
1590   if (TREE_CODE (*idx) != SSA_NAME)
1591     return true;
1592
1593   iv = get_iv (dta->ivopts_data, *idx);
1594   if (!iv)
1595     return false;
1596
1597   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1598           *&x[0], which is not folded and does not trigger the
1599           ARRAY_REF path below.  */
1600   *idx = iv->base;
1601
1602   if (integer_zerop (iv->step))
1603     return true;
1604
1605   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1606     {
1607       step = array_ref_element_size (base);
1608
1609       /* We only handle addresses whose step is an integer constant.  */
1610       if (TREE_CODE (step) != INTEGER_CST)
1611         return false;
1612     }
1613   else
1614     /* The step for pointer arithmetics already is 1 byte.  */
1615     step = size_one_node;
1616
1617   iv_base = iv->base;
1618   iv_step = iv->step;
1619   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1620                             sizetype, &iv_base, &iv_step, dta->stmt,
1621                             false))
1622     {
1623       /* The index might wrap.  */
1624       return false;
1625     }
1626
1627   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1628   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1629
1630   return true;
1631 }
1632
1633 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1634    object is passed to it in DATA.  */
1635
1636 static bool
1637 idx_record_use (tree base, tree *idx,
1638                 void *vdata)
1639 {
1640   struct ivopts_data *data = (struct ivopts_data *) vdata;
1641   find_interesting_uses_op (data, *idx);
1642   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1643     {
1644       find_interesting_uses_op (data, array_ref_element_size (base));
1645       find_interesting_uses_op (data, array_ref_low_bound (base));
1646     }
1647   return true;
1648 }
1649
1650 /* If we can prove that TOP = cst * BOT for some constant cst,
1651    store cst to MUL and return true.  Otherwise return false.
1652    The returned value is always sign-extended, regardless of the
1653    signedness of TOP and BOT.  */
1654
1655 static bool
1656 constant_multiple_of (tree top, tree bot, widest_int *mul)
1657 {
1658   tree mby;
1659   enum tree_code code;
1660   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1661   widest_int res, p0, p1;
1662
1663   STRIP_NOPS (top);
1664   STRIP_NOPS (bot);
1665
1666   if (operand_equal_p (top, bot, 0))
1667     {
1668       *mul = 1;
1669       return true;
1670     }
1671
1672   code = TREE_CODE (top);
1673   switch (code)
1674     {
1675     case MULT_EXPR:
1676       mby = TREE_OPERAND (top, 1);
1677       if (TREE_CODE (mby) != INTEGER_CST)
1678         return false;
1679
1680       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1681         return false;
1682
1683       *mul = wi::sext (res * wi::to_widest (mby), precision);
1684       return true;
1685
1686     case PLUS_EXPR:
1687     case MINUS_EXPR:
1688       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1689           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1690         return false;
1691
1692       if (code == MINUS_EXPR)
1693         p1 = -p1;
1694       *mul = wi::sext (p0 + p1, precision);
1695       return true;
1696
1697     case INTEGER_CST:
1698       if (TREE_CODE (bot) != INTEGER_CST)
1699         return false;
1700
1701       p0 = widest_int::from (top, SIGNED);
1702       p1 = widest_int::from (bot, SIGNED);
1703       if (p1 == 0)
1704         return false;
1705       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1706       return res == 0;
1707
1708     default:
1709       return false;
1710     }
1711 }
1712
1713 /* Return true if memory reference REF with step STEP may be unaligned.  */
1714
1715 static bool
1716 may_be_unaligned_p (tree ref, tree step)
1717 {
1718   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1719      thus they are not misaligned.  */
1720   if (TREE_CODE (ref) == TARGET_MEM_REF)
1721     return false;
1722
1723   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1724   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1725     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1726
1727   unsigned HOST_WIDE_INT bitpos;
1728   unsigned int ref_align;
1729   get_object_alignment_1 (ref, &ref_align, &bitpos);
1730   if (ref_align < align
1731       || (bitpos % align) != 0
1732       || (bitpos % BITS_PER_UNIT) != 0)
1733     return true;
1734
1735   unsigned int trailing_zeros = tree_ctz (step);
1736   if (trailing_zeros < HOST_BITS_PER_INT
1737       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1738     return true;
1739
1740   return false;
1741 }
1742
1743 /* Return true if EXPR may be non-addressable.   */
1744
1745 bool
1746 may_be_nonaddressable_p (tree expr)
1747 {
1748   switch (TREE_CODE (expr))
1749     {
1750     case TARGET_MEM_REF:
1751       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1752          target, thus they are always addressable.  */
1753       return false;
1754
1755     case COMPONENT_REF:
1756       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1757              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1758
1759     case VIEW_CONVERT_EXPR:
1760       /* This kind of view-conversions may wrap non-addressable objects
1761          and make them look addressable.  After some processing the
1762          non-addressability may be uncovered again, causing ADDR_EXPRs
1763          of inappropriate objects to be built.  */
1764       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1765           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1766         return true;
1767
1768       /* ... fall through ... */
1769
1770     case ARRAY_REF:
1771     case ARRAY_RANGE_REF:
1772       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1773
1774     CASE_CONVERT:
1775       return true;
1776
1777     default:
1778       break;
1779     }
1780
1781   return false;
1782 }
1783
1784 /* Finds addresses in *OP_P inside STMT.  */
1785
1786 static void
1787 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1788 {
1789   tree base = *op_p, step = size_zero_node;
1790   struct iv *civ;
1791   struct ifs_ivopts_data ifs_ivopts_data;
1792
1793   /* Do not play with volatile memory references.  A bit too conservative,
1794      perhaps, but safe.  */
1795   if (gimple_has_volatile_ops (stmt))
1796     goto fail;
1797
1798   /* Ignore bitfields for now.  Not really something terribly complicated
1799      to handle.  TODO.  */
1800   if (TREE_CODE (base) == BIT_FIELD_REF)
1801     goto fail;
1802
1803   base = unshare_expr (base);
1804
1805   if (TREE_CODE (base) == TARGET_MEM_REF)
1806     {
1807       tree type = build_pointer_type (TREE_TYPE (base));
1808       tree astep;
1809
1810       if (TMR_BASE (base)
1811           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1812         {
1813           civ = get_iv (data, TMR_BASE (base));
1814           if (!civ)
1815             goto fail;
1816
1817           TMR_BASE (base) = civ->base;
1818           step = civ->step;
1819         }
1820       if (TMR_INDEX2 (base)
1821           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1822         {
1823           civ = get_iv (data, TMR_INDEX2 (base));
1824           if (!civ)
1825             goto fail;
1826
1827           TMR_INDEX2 (base) = civ->base;
1828           step = civ->step;
1829         }
1830       if (TMR_INDEX (base)
1831           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1832         {
1833           civ = get_iv (data, TMR_INDEX (base));
1834           if (!civ)
1835             goto fail;
1836
1837           TMR_INDEX (base) = civ->base;
1838           astep = civ->step;
1839
1840           if (astep)
1841             {
1842               if (TMR_STEP (base))
1843                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1844
1845               step = fold_build2 (PLUS_EXPR, type, step, astep);
1846             }
1847         }
1848
1849       if (integer_zerop (step))
1850         goto fail;
1851       base = tree_mem_ref_addr (type, base);
1852     }
1853   else
1854     {
1855       ifs_ivopts_data.ivopts_data = data;
1856       ifs_ivopts_data.stmt = stmt;
1857       ifs_ivopts_data.step = size_zero_node;
1858       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1859           || integer_zerop (ifs_ivopts_data.step))
1860         goto fail;
1861       step = ifs_ivopts_data.step;
1862
1863       /* Check that the base expression is addressable.  This needs
1864          to be done after substituting bases of IVs into it.  */
1865       if (may_be_nonaddressable_p (base))
1866         goto fail;
1867
1868       /* Moreover, on strict alignment platforms, check that it is
1869          sufficiently aligned.  */
1870       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1871         goto fail;
1872
1873       base = build_fold_addr_expr (base);
1874
1875       /* Substituting bases of IVs into the base expression might
1876          have caused folding opportunities.  */
1877       if (TREE_CODE (base) == ADDR_EXPR)
1878         {
1879           tree *ref = &TREE_OPERAND (base, 0);
1880           while (handled_component_p (*ref))
1881             ref = &TREE_OPERAND (*ref, 0);
1882           if (TREE_CODE (*ref) == MEM_REF)
1883             {
1884               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1885                                       TREE_OPERAND (*ref, 0),
1886                                       TREE_OPERAND (*ref, 1));
1887               if (tem)
1888                 *ref = tem;
1889             }
1890         }
1891     }
1892
1893   civ = alloc_iv (base, step);
1894   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1895   return;
1896
1897 fail:
1898   for_each_index (op_p, idx_record_use, data);
1899 }
1900
1901 /* Finds and records invariants used in STMT.  */
1902
1903 static void
1904 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1905 {
1906   ssa_op_iter iter;
1907   use_operand_p use_p;
1908   tree op;
1909
1910   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1911     {
1912       op = USE_FROM_PTR (use_p);
1913       record_invariant (data, op, false);
1914     }
1915 }
1916
1917 /* Finds interesting uses of induction variables in the statement STMT.  */
1918
1919 static void
1920 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1921 {
1922   struct iv *iv;
1923   tree op, *lhs, *rhs;
1924   ssa_op_iter iter;
1925   use_operand_p use_p;
1926   enum tree_code code;
1927
1928   find_invariants_stmt (data, stmt);
1929
1930   if (gimple_code (stmt) == GIMPLE_COND)
1931     {
1932       find_interesting_uses_cond (data, stmt);
1933       return;
1934     }
1935
1936   if (is_gimple_assign (stmt))
1937     {
1938       lhs = gimple_assign_lhs_ptr (stmt);
1939       rhs = gimple_assign_rhs1_ptr (stmt);
1940
1941       if (TREE_CODE (*lhs) == SSA_NAME)
1942         {
1943           /* If the statement defines an induction variable, the uses are not
1944              interesting by themselves.  */
1945
1946           iv = get_iv (data, *lhs);
1947
1948           if (iv && !integer_zerop (iv->step))
1949             return;
1950         }
1951
1952       code = gimple_assign_rhs_code (stmt);
1953       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1954           && (REFERENCE_CLASS_P (*rhs)
1955               || is_gimple_val (*rhs)))
1956         {
1957           if (REFERENCE_CLASS_P (*rhs))
1958             find_interesting_uses_address (data, stmt, rhs);
1959           else
1960             find_interesting_uses_op (data, *rhs);
1961
1962           if (REFERENCE_CLASS_P (*lhs))
1963             find_interesting_uses_address (data, stmt, lhs);
1964           return;
1965         }
1966       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1967         {
1968           find_interesting_uses_cond (data, stmt);
1969           return;
1970         }
1971
1972       /* TODO -- we should also handle address uses of type
1973
1974          memory = call (whatever);
1975
1976          and
1977
1978          call (memory).  */
1979     }
1980
1981   if (gimple_code (stmt) == GIMPLE_PHI
1982       && gimple_bb (stmt) == data->current_loop->header)
1983     {
1984       iv = get_iv (data, PHI_RESULT (stmt));
1985
1986       if (iv && !integer_zerop (iv->step))
1987         return;
1988     }
1989
1990   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1991     {
1992       op = USE_FROM_PTR (use_p);
1993
1994       if (TREE_CODE (op) != SSA_NAME)
1995         continue;
1996
1997       iv = get_iv (data, op);
1998       if (!iv)
1999         continue;
2000
2001       find_interesting_uses_op (data, op);
2002     }
2003 }
2004
2005 /* Finds interesting uses of induction variables outside of loops
2006    on loop exit edge EXIT.  */
2007
2008 static void
2009 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2010 {
2011   gphi *phi;
2012   gphi_iterator psi;
2013   tree def;
2014
2015   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2016     {
2017       phi = psi.phi ();
2018       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2019       if (!virtual_operand_p (def))
2020         find_interesting_uses_op (data, def);
2021     }
2022 }
2023
2024 /* Finds uses of the induction variables that are interesting.  */
2025
2026 static void
2027 find_interesting_uses (struct ivopts_data *data)
2028 {
2029   basic_block bb;
2030   gimple_stmt_iterator bsi;
2031   basic_block *body = get_loop_body (data->current_loop);
2032   unsigned i;
2033   struct version_info *info;
2034   edge e;
2035
2036   if (dump_file && (dump_flags & TDF_DETAILS))
2037     fprintf (dump_file, "Uses:\n\n");
2038
2039   for (i = 0; i < data->current_loop->num_nodes; i++)
2040     {
2041       edge_iterator ei;
2042       bb = body[i];
2043
2044       FOR_EACH_EDGE (e, ei, bb->succs)
2045         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2046             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2047           find_interesting_uses_outside (data, e);
2048
2049       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2050         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2051       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2052         if (!is_gimple_debug (gsi_stmt (bsi)))
2053           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2054     }
2055
2056   if (dump_file && (dump_flags & TDF_DETAILS))
2057     {
2058       bitmap_iterator bi;
2059
2060       fprintf (dump_file, "\n");
2061
2062       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2063         {
2064           info = ver_info (data, i);
2065           if (info->inv_id)
2066             {
2067               fprintf (dump_file, "  ");
2068               print_generic_expr (dump_file, info->name, TDF_SLIM);
2069               fprintf (dump_file, " is invariant (%d)%s\n",
2070                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2071             }
2072         }
2073
2074       fprintf (dump_file, "\n");
2075     }
2076
2077   free (body);
2078 }
2079
2080 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2081    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2082    we are at the top-level of the processed address.  */
2083
2084 static tree
2085 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2086                 HOST_WIDE_INT *offset)
2087 {
2088   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2089   enum tree_code code;
2090   tree type, orig_type = TREE_TYPE (expr);
2091   HOST_WIDE_INT off0, off1, st;
2092   tree orig_expr = expr;
2093
2094   STRIP_NOPS (expr);
2095
2096   type = TREE_TYPE (expr);
2097   code = TREE_CODE (expr);
2098   *offset = 0;
2099
2100   switch (code)
2101     {
2102     case INTEGER_CST:
2103       if (!cst_and_fits_in_hwi (expr)
2104           || integer_zerop (expr))
2105         return orig_expr;
2106
2107       *offset = int_cst_value (expr);
2108       return build_int_cst (orig_type, 0);
2109
2110     case POINTER_PLUS_EXPR:
2111     case PLUS_EXPR:
2112     case MINUS_EXPR:
2113       op0 = TREE_OPERAND (expr, 0);
2114       op1 = TREE_OPERAND (expr, 1);
2115
2116       op0 = strip_offset_1 (op0, false, false, &off0);
2117       op1 = strip_offset_1 (op1, false, false, &off1);
2118
2119       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2120       if (op0 == TREE_OPERAND (expr, 0)
2121           && op1 == TREE_OPERAND (expr, 1))
2122         return orig_expr;
2123
2124       if (integer_zerop (op1))
2125         expr = op0;
2126       else if (integer_zerop (op0))
2127         {
2128           if (code == MINUS_EXPR)
2129             expr = fold_build1 (NEGATE_EXPR, type, op1);
2130           else
2131             expr = op1;
2132         }
2133       else
2134         expr = fold_build2 (code, type, op0, op1);
2135
2136       return fold_convert (orig_type, expr);
2137
2138     case MULT_EXPR:
2139       op1 = TREE_OPERAND (expr, 1);
2140       if (!cst_and_fits_in_hwi (op1))
2141         return orig_expr;
2142
2143       op0 = TREE_OPERAND (expr, 0);
2144       op0 = strip_offset_1 (op0, false, false, &off0);
2145       if (op0 == TREE_OPERAND (expr, 0))
2146         return orig_expr;
2147
2148       *offset = off0 * int_cst_value (op1);
2149       if (integer_zerop (op0))
2150         expr = op0;
2151       else
2152         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2153
2154       return fold_convert (orig_type, expr);
2155
2156     case ARRAY_REF:
2157     case ARRAY_RANGE_REF:
2158       if (!inside_addr)
2159         return orig_expr;
2160
2161       step = array_ref_element_size (expr);
2162       if (!cst_and_fits_in_hwi (step))
2163         break;
2164
2165       st = int_cst_value (step);
2166       op1 = TREE_OPERAND (expr, 1);
2167       op1 = strip_offset_1 (op1, false, false, &off1);
2168       *offset = off1 * st;
2169
2170       if (top_compref
2171           && integer_zerop (op1))
2172         {
2173           /* Strip the component reference completely.  */
2174           op0 = TREE_OPERAND (expr, 0);
2175           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2176           *offset += off0;
2177           return op0;
2178         }
2179       break;
2180
2181     case COMPONENT_REF:
2182       {
2183         tree field;
2184
2185         if (!inside_addr)
2186           return orig_expr;
2187
2188         tmp = component_ref_field_offset (expr);
2189         field = TREE_OPERAND (expr, 1);
2190         if (top_compref
2191             && cst_and_fits_in_hwi (tmp)
2192             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2193           {
2194             HOST_WIDE_INT boffset, abs_off;
2195
2196             /* Strip the component reference completely.  */
2197             op0 = TREE_OPERAND (expr, 0);
2198             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2199             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2200             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2201             if (boffset < 0)
2202               abs_off = -abs_off;
2203
2204             *offset = off0 + int_cst_value (tmp) + abs_off;
2205             return op0;
2206           }
2207       }
2208       break;
2209
2210     case ADDR_EXPR:
2211       op0 = TREE_OPERAND (expr, 0);
2212       op0 = strip_offset_1 (op0, true, true, &off0);
2213       *offset += off0;
2214
2215       if (op0 == TREE_OPERAND (expr, 0))
2216         return orig_expr;
2217
2218       expr = build_fold_addr_expr (op0);
2219       return fold_convert (orig_type, expr);
2220
2221     case MEM_REF:
2222       /* ???  Offset operand?  */
2223       inside_addr = false;
2224       break;
2225
2226     default:
2227       return orig_expr;
2228     }
2229
2230   /* Default handling of expressions for that we want to recurse into
2231      the first operand.  */
2232   op0 = TREE_OPERAND (expr, 0);
2233   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2234   *offset += off0;
2235
2236   if (op0 == TREE_OPERAND (expr, 0)
2237       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2238     return orig_expr;
2239
2240   expr = copy_node (expr);
2241   TREE_OPERAND (expr, 0) = op0;
2242   if (op1)
2243     TREE_OPERAND (expr, 1) = op1;
2244
2245   /* Inside address, we might strip the top level component references,
2246      thus changing type of the expression.  Handling of ADDR_EXPR
2247      will fix that.  */
2248   expr = fold_convert (orig_type, expr);
2249
2250   return expr;
2251 }
2252
2253 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2254
2255 static tree
2256 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2257 {
2258   HOST_WIDE_INT off;
2259   tree core = strip_offset_1 (expr, false, false, &off);
2260   *offset = off;
2261   return core;
2262 }
2263
2264 /* Returns variant of TYPE that can be used as base for different uses.
2265    We return unsigned type with the same precision, which avoids problems
2266    with overflows.  */
2267
2268 static tree
2269 generic_type_for (tree type)
2270 {
2271   if (POINTER_TYPE_P (type))
2272     return unsigned_type_for (type);
2273
2274   if (TYPE_UNSIGNED (type))
2275     return type;
2276
2277   return unsigned_type_for (type);
2278 }
2279
2280 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2281    the bitmap to that we should store it.  */
2282
2283 static struct ivopts_data *fd_ivopts_data;
2284 static tree
2285 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2286 {
2287   bitmap *depends_on = (bitmap *) data;
2288   struct version_info *info;
2289
2290   if (TREE_CODE (*expr_p) != SSA_NAME)
2291     return NULL_TREE;
2292   info = name_info (fd_ivopts_data, *expr_p);
2293
2294   if (!info->inv_id || info->has_nonlin_use)
2295     return NULL_TREE;
2296
2297   if (!*depends_on)
2298     *depends_on = BITMAP_ALLOC (NULL);
2299   bitmap_set_bit (*depends_on, info->inv_id);
2300
2301   return NULL_TREE;
2302 }
2303
2304 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2305    position to POS.  If USE is not NULL, the candidate is set as related to
2306    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2307    replacement of the final value of the iv by a direct computation.  */
2308
2309 static struct iv_cand *
2310 add_candidate_1 (struct ivopts_data *data,
2311                  tree base, tree step, bool important, enum iv_position pos,
2312                  struct iv_use *use, gimple incremented_at)
2313 {
2314   unsigned i;
2315   struct iv_cand *cand = NULL;
2316   tree type, orig_type;
2317
2318   /* For non-original variables, make sure their values are computed in a type
2319      that does not invoke undefined behavior on overflows (since in general,
2320      we cannot prove that these induction variables are non-wrapping).  */
2321   if (pos != IP_ORIGINAL)
2322     {
2323       orig_type = TREE_TYPE (base);
2324       type = generic_type_for (orig_type);
2325       if (type != orig_type)
2326         {
2327           base = fold_convert (type, base);
2328           step = fold_convert (type, step);
2329         }
2330     }
2331
2332   for (i = 0; i < n_iv_cands (data); i++)
2333     {
2334       cand = iv_cand (data, i);
2335
2336       if (cand->pos != pos)
2337         continue;
2338
2339       if (cand->incremented_at != incremented_at
2340           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2341               && cand->ainc_use != use))
2342         continue;
2343
2344       if (!cand->iv)
2345         {
2346           if (!base && !step)
2347             break;
2348
2349           continue;
2350         }
2351
2352       if (!base && !step)
2353         continue;
2354
2355       if (operand_equal_p (base, cand->iv->base, 0)
2356           && operand_equal_p (step, cand->iv->step, 0)
2357           && (TYPE_PRECISION (TREE_TYPE (base))
2358               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2359         break;
2360     }
2361
2362   if (i == n_iv_cands (data))
2363     {
2364       cand = XCNEW (struct iv_cand);
2365       cand->id = i;
2366
2367       if (!base && !step)
2368         cand->iv = NULL;
2369       else
2370         cand->iv = alloc_iv (base, step);
2371
2372       cand->pos = pos;
2373       if (pos != IP_ORIGINAL && cand->iv)
2374         {
2375           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2376           cand->var_after = cand->var_before;
2377         }
2378       cand->important = important;
2379       cand->incremented_at = incremented_at;
2380       data->iv_candidates.safe_push (cand);
2381
2382       if (step
2383           && TREE_CODE (step) != INTEGER_CST)
2384         {
2385           fd_ivopts_data = data;
2386           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2387         }
2388
2389       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2390         cand->ainc_use = use;
2391       else
2392         cand->ainc_use = NULL;
2393
2394       if (dump_file && (dump_flags & TDF_DETAILS))
2395         dump_cand (dump_file, cand);
2396     }
2397
2398   if (important && !cand->important)
2399     {
2400       cand->important = true;
2401       if (dump_file && (dump_flags & TDF_DETAILS))
2402         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2403     }
2404
2405   if (use)
2406     {
2407       bitmap_set_bit (use->related_cands, i);
2408       if (dump_file && (dump_flags & TDF_DETAILS))
2409         fprintf (dump_file, "Candidate %d is related to use %d\n",
2410                  cand->id, use->id);
2411     }
2412
2413   return cand;
2414 }
2415
2416 /* Returns true if incrementing the induction variable at the end of the LOOP
2417    is allowed.
2418
2419    The purpose is to avoid splitting latch edge with a biv increment, thus
2420    creating a jump, possibly confusing other optimization passes and leaving
2421    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2422    is not available (so we do not have a better alternative), or if the latch
2423    edge is already nonempty.  */
2424
2425 static bool
2426 allow_ip_end_pos_p (struct loop *loop)
2427 {
2428   if (!ip_normal_pos (loop))
2429     return true;
2430
2431   if (!empty_block_p (ip_end_pos (loop)))
2432     return true;
2433
2434   return false;
2435 }
2436
2437 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2438    Important field is set to IMPORTANT.  */
2439
2440 static void
2441 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2442                         bool important, struct iv_use *use)
2443 {
2444   basic_block use_bb = gimple_bb (use->stmt);
2445   machine_mode mem_mode;
2446   unsigned HOST_WIDE_INT cstepi;
2447
2448   /* If we insert the increment in any position other than the standard
2449      ones, we must ensure that it is incremented once per iteration.
2450      It must not be in an inner nested loop, or one side of an if
2451      statement.  */
2452   if (use_bb->loop_father != data->current_loop
2453       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2454       || stmt_could_throw_p (use->stmt)
2455       || !cst_and_fits_in_hwi (step))
2456     return;
2457
2458   cstepi = int_cst_value (step);
2459
2460   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2461   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2462         || USE_STORE_PRE_INCREMENT (mem_mode))
2463        && GET_MODE_SIZE (mem_mode) == cstepi)
2464       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2465            || USE_STORE_PRE_DECREMENT (mem_mode))
2466           && GET_MODE_SIZE (mem_mode) == -cstepi))
2467     {
2468       enum tree_code code = MINUS_EXPR;
2469       tree new_base;
2470       tree new_step = step;
2471
2472       if (POINTER_TYPE_P (TREE_TYPE (base)))
2473         {
2474           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2475           code = POINTER_PLUS_EXPR;
2476         }
2477       else
2478         new_step = fold_convert (TREE_TYPE (base), new_step);
2479       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2480       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2481                        use->stmt);
2482     }
2483   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2484         || USE_STORE_POST_INCREMENT (mem_mode))
2485        && GET_MODE_SIZE (mem_mode) == cstepi)
2486       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2487            || USE_STORE_POST_DECREMENT (mem_mode))
2488           && GET_MODE_SIZE (mem_mode) == -cstepi))
2489     {
2490       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2491                        use->stmt);
2492     }
2493 }
2494
2495 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2496    position to POS.  If USE is not NULL, the candidate is set as related to
2497    it.  The candidate computation is scheduled on all available positions.  */
2498
2499 static void
2500 add_candidate (struct ivopts_data *data,
2501                tree base, tree step, bool important, struct iv_use *use)
2502 {
2503   if (ip_normal_pos (data->current_loop))
2504     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2505   if (ip_end_pos (data->current_loop)
2506       && allow_ip_end_pos_p (data->current_loop))
2507     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2508
2509   if (use != NULL && use->type == USE_ADDRESS)
2510     add_autoinc_candidates (data, base, step, important, use);
2511 }
2512
2513 /* Adds standard iv candidates.  */
2514
2515 static void
2516 add_standard_iv_candidates (struct ivopts_data *data)
2517 {
2518   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2519
2520   /* The same for a double-integer type if it is still fast enough.  */
2521   if (TYPE_PRECISION
2522         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2523       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2524     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2525                    build_int_cst (long_integer_type_node, 1), true, NULL);
2526
2527   /* The same for a double-integer type if it is still fast enough.  */
2528   if (TYPE_PRECISION
2529         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2530       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2531     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2532                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2533 }
2534
2535
2536 /* Adds candidates bases on the old induction variable IV.  */
2537
2538 static void
2539 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2540 {
2541   gimple phi;
2542   tree def;
2543   struct iv_cand *cand;
2544
2545   add_candidate (data, iv->base, iv->step, true, NULL);
2546
2547   /* The same, but with initial value zero.  */
2548   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2549     add_candidate (data, size_int (0), iv->step, true, NULL);
2550   else
2551     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2552                    iv->step, true, NULL);
2553
2554   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2555   if (gimple_code (phi) == GIMPLE_PHI)
2556     {
2557       /* Additionally record the possibility of leaving the original iv
2558          untouched.  */
2559       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2560       /* Don't add candidate if it's from another PHI node because
2561          it's an affine iv appearing in the form of PEELED_CHREC.  */
2562       phi = SSA_NAME_DEF_STMT (def);
2563       if (gimple_code (phi) != GIMPLE_PHI)
2564         {
2565           cand = add_candidate_1 (data,
2566                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2567                                   SSA_NAME_DEF_STMT (def));
2568           cand->var_before = iv->ssa_name;
2569           cand->var_after = def;
2570         }
2571       else
2572         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2573     }
2574 }
2575
2576 /* Adds candidates based on the old induction variables.  */
2577
2578 static void
2579 add_old_ivs_candidates (struct ivopts_data *data)
2580 {
2581   unsigned i;
2582   struct iv *iv;
2583   bitmap_iterator bi;
2584
2585   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2586     {
2587       iv = ver_info (data, i)->iv;
2588       if (iv && iv->biv_p && !integer_zerop (iv->step))
2589         add_old_iv_candidates (data, iv);
2590     }
2591 }
2592
2593 /* Adds candidates based on the value of the induction variable IV and USE.  */
2594
2595 static void
2596 add_iv_value_candidates (struct ivopts_data *data,
2597                          struct iv *iv, struct iv_use *use)
2598 {
2599   unsigned HOST_WIDE_INT offset;
2600   tree base;
2601   tree basetype;
2602
2603   add_candidate (data, iv->base, iv->step, false, use);
2604
2605   /* The same, but with initial value zero.  Make such variable important,
2606      since it is generic enough so that possibly many uses may be based
2607      on it.  */
2608   basetype = TREE_TYPE (iv->base);
2609   if (POINTER_TYPE_P (basetype))
2610     basetype = sizetype;
2611   add_candidate (data, build_int_cst (basetype, 0),
2612                  iv->step, true, use);
2613
2614   /* Third, try removing the constant offset.  Make sure to even
2615      add a candidate for &a[0] vs. (T *)&a.  */
2616   base = strip_offset (iv->base, &offset);
2617   if (offset
2618       || base != iv->base)
2619     add_candidate (data, base, iv->step, false, use);
2620 }
2621
2622 /* Adds candidates based on the uses.  */
2623
2624 static void
2625 add_derived_ivs_candidates (struct ivopts_data *data)
2626 {
2627   unsigned i;
2628
2629   for (i = 0; i < n_iv_uses (data); i++)
2630     {
2631       struct iv_use *use = iv_use (data, i);
2632
2633       if (!use)
2634         continue;
2635
2636       switch (use->type)
2637         {
2638         case USE_NONLINEAR_EXPR:
2639         case USE_COMPARE:
2640         case USE_ADDRESS:
2641           /* Just add the ivs based on the value of the iv used here.  */
2642           add_iv_value_candidates (data, use->iv, use);
2643           break;
2644
2645         default:
2646           gcc_unreachable ();
2647         }
2648     }
2649 }
2650
2651 /* Record important candidates and add them to related_cands bitmaps
2652    if needed.  */
2653
2654 static void
2655 record_important_candidates (struct ivopts_data *data)
2656 {
2657   unsigned i;
2658   struct iv_use *use;
2659
2660   for (i = 0; i < n_iv_cands (data); i++)
2661     {
2662       struct iv_cand *cand = iv_cand (data, i);
2663
2664       if (cand->important)
2665         bitmap_set_bit (data->important_candidates, i);
2666     }
2667
2668   data->consider_all_candidates = (n_iv_cands (data)
2669                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2670
2671   if (data->consider_all_candidates)
2672     {
2673       /* We will not need "related_cands" bitmaps in this case,
2674          so release them to decrease peak memory consumption.  */
2675       for (i = 0; i < n_iv_uses (data); i++)
2676         {
2677           use = iv_use (data, i);
2678           BITMAP_FREE (use->related_cands);
2679         }
2680     }
2681   else
2682     {
2683       /* Add important candidates to the related_cands bitmaps.  */
2684       for (i = 0; i < n_iv_uses (data); i++)
2685         bitmap_ior_into (iv_use (data, i)->related_cands,
2686                          data->important_candidates);
2687     }
2688 }
2689
2690 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2691    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2692    we allocate a simple list to every use.  */
2693
2694 static void
2695 alloc_use_cost_map (struct ivopts_data *data)
2696 {
2697   unsigned i, size, s;
2698
2699   for (i = 0; i < n_iv_uses (data); i++)
2700     {
2701       struct iv_use *use = iv_use (data, i);
2702
2703       if (data->consider_all_candidates)
2704         size = n_iv_cands (data);
2705       else
2706         {
2707           s = bitmap_count_bits (use->related_cands);
2708
2709           /* Round up to the power of two, so that moduling by it is fast.  */
2710           size = s ? (1 << ceil_log2 (s)) : 1;
2711         }
2712
2713       use->n_map_members = size;
2714       use->cost_map = XCNEWVEC (struct cost_pair, size);
2715     }
2716 }
2717
2718 /* Returns description of computation cost of expression whose runtime
2719    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2720
2721 static comp_cost
2722 new_cost (unsigned runtime, unsigned complexity)
2723 {
2724   comp_cost cost;
2725
2726   cost.cost = runtime;
2727   cost.complexity = complexity;
2728
2729   return cost;
2730 }
2731
2732 /* Adds costs COST1 and COST2.  */
2733
2734 static comp_cost
2735 add_costs (comp_cost cost1, comp_cost cost2)
2736 {
2737   cost1.cost += cost2.cost;
2738   cost1.complexity += cost2.complexity;
2739
2740   return cost1;
2741 }
2742 /* Subtracts costs COST1 and COST2.  */
2743
2744 static comp_cost
2745 sub_costs (comp_cost cost1, comp_cost cost2)
2746 {
2747   cost1.cost -= cost2.cost;
2748   cost1.complexity -= cost2.complexity;
2749
2750   return cost1;
2751 }
2752
2753 /* Returns a negative number if COST1 < COST2, a positive number if
2754    COST1 > COST2, and 0 if COST1 = COST2.  */
2755
2756 static int
2757 compare_costs (comp_cost cost1, comp_cost cost2)
2758 {
2759   if (cost1.cost == cost2.cost)
2760     return cost1.complexity - cost2.complexity;
2761
2762   return cost1.cost - cost2.cost;
2763 }
2764
2765 /* Returns true if COST is infinite.  */
2766
2767 static bool
2768 infinite_cost_p (comp_cost cost)
2769 {
2770   return cost.cost == INFTY;
2771 }
2772
2773 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2774    on invariants DEPENDS_ON and that the value used in expressing it
2775    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2776
2777 static void
2778 set_use_iv_cost (struct ivopts_data *data,
2779                  struct iv_use *use, struct iv_cand *cand,
2780                  comp_cost cost, bitmap depends_on, tree value,
2781                  enum tree_code comp, int inv_expr_id)
2782 {
2783   unsigned i, s;
2784
2785   if (infinite_cost_p (cost))
2786     {
2787       BITMAP_FREE (depends_on);
2788       return;
2789     }
2790
2791   if (data->consider_all_candidates)
2792     {
2793       use->cost_map[cand->id].cand = cand;
2794       use->cost_map[cand->id].cost = cost;
2795       use->cost_map[cand->id].depends_on = depends_on;
2796       use->cost_map[cand->id].value = value;
2797       use->cost_map[cand->id].comp = comp;
2798       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2799       return;
2800     }
2801
2802   /* n_map_members is a power of two, so this computes modulo.  */
2803   s = cand->id & (use->n_map_members - 1);
2804   for (i = s; i < use->n_map_members; i++)
2805     if (!use->cost_map[i].cand)
2806       goto found;
2807   for (i = 0; i < s; i++)
2808     if (!use->cost_map[i].cand)
2809       goto found;
2810
2811   gcc_unreachable ();
2812
2813 found:
2814   use->cost_map[i].cand = cand;
2815   use->cost_map[i].cost = cost;
2816   use->cost_map[i].depends_on = depends_on;
2817   use->cost_map[i].value = value;
2818   use->cost_map[i].comp = comp;
2819   use->cost_map[i].inv_expr_id = inv_expr_id;
2820 }
2821
2822 /* Gets cost of (USE, CANDIDATE) pair.  */
2823
2824 static struct cost_pair *
2825 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2826                  struct iv_cand *cand)
2827 {
2828   unsigned i, s;
2829   struct cost_pair *ret;
2830
2831   if (!cand)
2832     return NULL;
2833
2834   if (data->consider_all_candidates)
2835     {
2836       ret = use->cost_map + cand->id;
2837       if (!ret->cand)
2838         return NULL;
2839
2840       return ret;
2841     }
2842
2843   /* n_map_members is a power of two, so this computes modulo.  */
2844   s = cand->id & (use->n_map_members - 1);
2845   for (i = s; i < use->n_map_members; i++)
2846     if (use->cost_map[i].cand == cand)
2847       return use->cost_map + i;
2848     else if (use->cost_map[i].cand == NULL)
2849       return NULL;
2850   for (i = 0; i < s; i++)
2851     if (use->cost_map[i].cand == cand)
2852       return use->cost_map + i;
2853     else if (use->cost_map[i].cand == NULL)
2854       return NULL;
2855
2856   return NULL;
2857 }
2858
2859 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2860 static rtx
2861 produce_memory_decl_rtl (tree obj, int *regno)
2862 {
2863   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2864   machine_mode address_mode = targetm.addr_space.address_mode (as);
2865   rtx x;
2866
2867   gcc_assert (obj);
2868   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2869     {
2870       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2871       x = gen_rtx_SYMBOL_REF (address_mode, name);
2872       SET_SYMBOL_REF_DECL (x, obj);
2873       x = gen_rtx_MEM (DECL_MODE (obj), x);
2874       set_mem_addr_space (x, as);
2875       targetm.encode_section_info (obj, x, true);
2876     }
2877   else
2878     {
2879       x = gen_raw_REG (address_mode, (*regno)++);
2880       x = gen_rtx_MEM (DECL_MODE (obj), x);
2881       set_mem_addr_space (x, as);
2882     }
2883
2884   return x;
2885 }
2886
2887 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2888    walk_tree.  DATA contains the actual fake register number.  */
2889
2890 static tree
2891 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2892 {
2893   tree obj = NULL_TREE;
2894   rtx x = NULL_RTX;
2895   int *regno = (int *) data;
2896
2897   switch (TREE_CODE (*expr_p))
2898     {
2899     case ADDR_EXPR:
2900       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2901            handled_component_p (*expr_p);
2902            expr_p = &TREE_OPERAND (*expr_p, 0))
2903         continue;
2904       obj = *expr_p;
2905       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2906         x = produce_memory_decl_rtl (obj, regno);
2907       break;
2908
2909     case SSA_NAME:
2910       *ws = 0;
2911       obj = SSA_NAME_VAR (*expr_p);
2912       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2913       if (!obj)
2914         return NULL_TREE;
2915       if (!DECL_RTL_SET_P (obj))
2916         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2917       break;
2918
2919     case VAR_DECL:
2920     case PARM_DECL:
2921     case RESULT_DECL:
2922       *ws = 0;
2923       obj = *expr_p;
2924
2925       if (DECL_RTL_SET_P (obj))
2926         break;
2927
2928       if (DECL_MODE (obj) == BLKmode)
2929         x = produce_memory_decl_rtl (obj, regno);
2930       else
2931         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2932
2933       break;
2934
2935     default:
2936       break;
2937     }
2938
2939   if (x)
2940     {
2941       decl_rtl_to_reset.safe_push (obj);
2942       SET_DECL_RTL (obj, x);
2943     }
2944
2945   return NULL_TREE;
2946 }
2947
2948 /* Determines cost of the computation of EXPR.  */
2949
2950 static unsigned
2951 computation_cost (tree expr, bool speed)
2952 {
2953   rtx_insn *seq;
2954   rtx rslt;
2955   tree type = TREE_TYPE (expr);
2956   unsigned cost;
2957   /* Avoid using hard regs in ways which may be unsupported.  */
2958   int regno = LAST_VIRTUAL_REGISTER + 1;
2959   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2960   enum node_frequency real_frequency = node->frequency;
2961
2962   node->frequency = NODE_FREQUENCY_NORMAL;
2963   crtl->maybe_hot_insn_p = speed;
2964   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2965   start_sequence ();
2966   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2967   seq = get_insns ();
2968   end_sequence ();
2969   default_rtl_profile ();
2970   node->frequency = real_frequency;
2971
2972   cost = seq_cost (seq, speed);
2973   if (MEM_P (rslt))
2974     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2975                           TYPE_ADDR_SPACE (type), speed);
2976   else if (!REG_P (rslt))
2977     cost += set_src_cost (rslt, speed);
2978
2979   return cost;
2980 }
2981
2982 /* Returns variable containing the value of candidate CAND at statement AT.  */
2983
2984 static tree
2985 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2986 {
2987   if (stmt_after_increment (loop, cand, stmt))
2988     return cand->var_after;
2989   else
2990     return cand->var_before;
2991 }
2992
2993 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2994    same precision that is at least as wide as the precision of TYPE, stores
2995    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2996    type of A and B.  */
2997
2998 static tree
2999 determine_common_wider_type (tree *a, tree *b)
3000 {
3001   tree wider_type = NULL;
3002   tree suba, subb;
3003   tree atype = TREE_TYPE (*a);
3004
3005   if (CONVERT_EXPR_P (*a))
3006     {
3007       suba = TREE_OPERAND (*a, 0);
3008       wider_type = TREE_TYPE (suba);
3009       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3010         return atype;
3011     }
3012   else
3013     return atype;
3014
3015   if (CONVERT_EXPR_P (*b))
3016     {
3017       subb = TREE_OPERAND (*b, 0);
3018       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3019         return atype;
3020     }
3021   else
3022     return atype;
3023
3024   *a = suba;
3025   *b = subb;
3026   return wider_type;
3027 }
3028
3029 /* Determines the expression by that USE is expressed from induction variable
3030    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3031    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3032
3033 static bool
3034 get_computation_aff (struct loop *loop,
3035                      struct iv_use *use, struct iv_cand *cand, gimple at,
3036                      struct aff_tree *aff)
3037 {
3038   tree ubase = use->iv->base;
3039   tree ustep = use->iv->step;
3040   tree cbase = cand->iv->base;
3041   tree cstep = cand->iv->step, cstep_common;
3042   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3043   tree common_type, var;
3044   tree uutype;
3045   aff_tree cbase_aff, var_aff;
3046   widest_int rat;
3047
3048   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3049     {
3050       /* We do not have a precision to express the values of use.  */
3051       return false;
3052     }
3053
3054   var = var_at_stmt (loop, cand, at);
3055   uutype = unsigned_type_for (utype);
3056
3057   /* If the conversion is not noop, perform it.  */
3058   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3059     {
3060       cstep = fold_convert (uutype, cstep);
3061       cbase = fold_convert (uutype, cbase);
3062       var = fold_convert (uutype, var);
3063     }
3064
3065   if (!constant_multiple_of (ustep, cstep, &rat))
3066     return false;
3067
3068   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3069      type, we achieve better folding by computing their difference in this
3070      wider type, and cast the result to UUTYPE.  We do not need to worry about
3071      overflows, as all the arithmetics will in the end be performed in UUTYPE
3072      anyway.  */
3073   common_type = determine_common_wider_type (&ubase, &cbase);
3074
3075   /* use = ubase - ratio * cbase + ratio * var.  */
3076   tree_to_aff_combination (ubase, common_type, aff);
3077   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3078   tree_to_aff_combination (var, uutype, &var_aff);
3079
3080   /* We need to shift the value if we are after the increment.  */
3081   if (stmt_after_increment (loop, cand, at))
3082     {
3083       aff_tree cstep_aff;
3084
3085       if (common_type != uutype)
3086         cstep_common = fold_convert (common_type, cstep);
3087       else
3088         cstep_common = cstep;
3089
3090       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3091       aff_combination_add (&cbase_aff, &cstep_aff);
3092     }
3093
3094   aff_combination_scale (&cbase_aff, -rat);
3095   aff_combination_add (aff, &cbase_aff);
3096   if (common_type != uutype)
3097     aff_combination_convert (aff, uutype);
3098
3099   aff_combination_scale (&var_aff, rat);
3100   aff_combination_add (aff, &var_aff);
3101
3102   return true;
3103 }
3104
3105 /* Return the type of USE.  */
3106
3107 static tree
3108 get_use_type (struct iv_use *use)
3109 {
3110   tree base_type = TREE_TYPE (use->iv->base);
3111   tree type;
3112
3113   if (use->type == USE_ADDRESS)
3114     {
3115       /* The base_type may be a void pointer.  Create a pointer type based on
3116          the mem_ref instead.  */
3117       type = build_pointer_type (TREE_TYPE (*use->op_p));
3118       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3119                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3120     }
3121   else
3122     type = base_type;
3123
3124   return type;
3125 }
3126
3127 /* Determines the expression by that USE is expressed from induction variable
3128    CAND at statement AT in LOOP.  The computation is unshared.  */
3129
3130 static tree
3131 get_computation_at (struct loop *loop,
3132                     struct iv_use *use, struct iv_cand *cand, gimple at)
3133 {
3134   aff_tree aff;
3135   tree type = get_use_type (use);
3136
3137   if (!get_computation_aff (loop, use, cand, at, &aff))
3138     return NULL_TREE;
3139   unshare_aff_combination (&aff);
3140   return fold_convert (type, aff_combination_to_tree (&aff));
3141 }
3142
3143 /* Determines the expression by that USE is expressed from induction variable
3144    CAND in LOOP.  The computation is unshared.  */
3145
3146 static tree
3147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3148 {
3149   return get_computation_at (loop, use, cand, use->stmt);
3150 }
3151
3152 /* Adjust the cost COST for being in loop setup rather than loop body.
3153    If we're optimizing for space, the loop setup overhead is constant;
3154    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3155 static unsigned
3156 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3157 {
3158   if (cost == INFTY)
3159     return cost;
3160   else if (optimize_loop_for_speed_p (data->current_loop))
3161     return cost / avg_loop_niter (data->current_loop);
3162   else
3163     return cost;
3164 }
3165
3166 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3167    validity for a memory reference accessing memory of mode MODE in
3168    address space AS.  */
3169
3170
3171 bool
3172 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3173                                  addr_space_t as)
3174 {
3175 #define MAX_RATIO 128
3176   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3177   static vec<sbitmap> valid_mult_list;
3178   sbitmap valid_mult;
3179
3180   if (data_index >= valid_mult_list.length ())
3181     valid_mult_list.safe_grow_cleared (data_index + 1);
3182
3183   valid_mult = valid_mult_list[data_index];
3184   if (!valid_mult)
3185     {
3186       machine_mode address_mode = targetm.addr_space.address_mode (as);
3187       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3188       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3189       rtx addr, scaled;
3190       HOST_WIDE_INT i;
3191
3192       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3193       bitmap_clear (valid_mult);
3194       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3195       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3196       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3197         {
3198           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3199           if (memory_address_addr_space_p (mode, addr, as)
3200               || memory_address_addr_space_p (mode, scaled, as))
3201             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3202         }
3203
3204       if (dump_file && (dump_flags & TDF_DETAILS))
3205         {
3206           fprintf (dump_file, "  allowed multipliers:");
3207           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3208             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3209               fprintf (dump_file, " %d", (int) i);
3210           fprintf (dump_file, "\n");
3211           fprintf (dump_file, "\n");
3212         }
3213
3214       valid_mult_list[data_index] = valid_mult;
3215     }
3216
3217   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3218     return false;
3219
3220   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3221 }
3222
3223 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3224    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3225    variable is omitted.  Compute the cost for a memory reference that accesses
3226    a memory location of mode MEM_MODE in address space AS.
3227
3228    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3229    size of MEM_MODE / RATIO) is available.  To make this determination, we
3230    look at the size of the increment to be made, which is given in CSTEP.
3231    CSTEP may be zero if the step is unknown.
3232    STMT_AFTER_INC is true iff the statement we're looking at is after the
3233    increment of the original biv.
3234
3235    TODO -- there must be some better way.  This all is quite crude.  */
3236
3237 enum ainc_type
3238 {
3239   AINC_PRE_INC,         /* Pre increment.  */
3240   AINC_PRE_DEC,         /* Pre decrement.  */
3241   AINC_POST_INC,        /* Post increment.  */
3242   AINC_POST_DEC,        /* Post decrement.  */
3243   AINC_NONE             /* Also the number of auto increment types.  */
3244 };
3245
3246 typedef struct address_cost_data_s
3247 {
3248   HOST_WIDE_INT min_offset, max_offset;
3249   unsigned costs[2][2][2][2];
3250   unsigned ainc_costs[AINC_NONE];
3251 } *address_cost_data;
3252
3253
3254 static comp_cost
3255 get_address_cost (bool symbol_present, bool var_present,
3256                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3257                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3258                   addr_space_t as, bool speed,
3259                   bool stmt_after_inc, bool *may_autoinc)
3260 {
3261   machine_mode address_mode = targetm.addr_space.address_mode (as);
3262   static vec<address_cost_data> address_cost_data_list;
3263   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3264   address_cost_data data;
3265   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3266   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3267   unsigned cost, acost, complexity;
3268   enum ainc_type autoinc_type;
3269   bool offset_p, ratio_p, autoinc;
3270   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3271   unsigned HOST_WIDE_INT mask;
3272   unsigned bits;
3273
3274   if (data_index >= address_cost_data_list.length ())
3275     address_cost_data_list.safe_grow_cleared (data_index + 1);
3276
3277   data = address_cost_data_list[data_index];
3278   if (!data)
3279     {
3280       HOST_WIDE_INT i;
3281       HOST_WIDE_INT rat, off = 0;
3282       int old_cse_not_expected, width;
3283       unsigned sym_p, var_p, off_p, rat_p, add_c;
3284       rtx_insn *seq;
3285       rtx addr, base;
3286       rtx reg0, reg1;
3287
3288       data = (address_cost_data) xcalloc (1, sizeof (*data));
3289
3290       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3291
3292       width = GET_MODE_BITSIZE (address_mode) - 1;
3293       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3294         width = HOST_BITS_PER_WIDE_INT - 1;
3295       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3296
3297       for (i = width; i >= 0; i--)
3298         {
3299           off = -((unsigned HOST_WIDE_INT) 1 << i);
3300           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3301           if (memory_address_addr_space_p (mem_mode, addr, as))
3302             break;
3303         }
3304       data->min_offset = (i == -1? 0 : off);
3305
3306       for (i = width; i >= 0; i--)
3307         {
3308           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3309           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3310           if (memory_address_addr_space_p (mem_mode, addr, as))
3311             break;
3312           /* For some TARGET, like ARM THUMB1, the offset should be nature
3313              aligned.  Try an aligned offset if address_mode is not QImode.  */
3314           off = (address_mode == QImode)
3315                 ? 0
3316                 : ((unsigned HOST_WIDE_INT) 1 << i)
3317                     - GET_MODE_SIZE (address_mode);
3318           if (off > 0)
3319             {
3320               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3321               if (memory_address_addr_space_p (mem_mode, addr, as))
3322                 break;
3323             }
3324         }
3325       if (i == -1)
3326         off = 0;
3327       data->max_offset = off;
3328
3329       if (dump_file && (dump_flags & TDF_DETAILS))
3330         {
3331           fprintf (dump_file, "get_address_cost:\n");
3332           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3333                    GET_MODE_NAME (mem_mode),
3334                    data->min_offset);
3335           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3336                    GET_MODE_NAME (mem_mode),
3337                    data->max_offset);
3338         }
3339
3340       rat = 1;
3341       for (i = 2; i <= MAX_RATIO; i++)
3342         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3343           {
3344             rat = i;
3345             break;
3346           }
3347
3348       /* Compute the cost of various addressing modes.  */
3349       acost = 0;
3350       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3351       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3352
3353       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3354           || USE_STORE_PRE_DECREMENT (mem_mode))
3355         {
3356           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3357           has_predec[mem_mode]
3358             = memory_address_addr_space_p (mem_mode, addr, as);
3359
3360           if (has_predec[mem_mode])
3361             data->ainc_costs[AINC_PRE_DEC]
3362               = address_cost (addr, mem_mode, as, speed);
3363         }
3364       if (USE_LOAD_POST_DECREMENT (mem_mode)
3365           || USE_STORE_POST_DECREMENT (mem_mode))
3366         {
3367           addr = gen_rtx_POST_DEC (address_mode, reg0);
3368           has_postdec[mem_mode]
3369             = memory_address_addr_space_p (mem_mode, addr, as);
3370
3371           if (has_postdec[mem_mode])
3372             data->ainc_costs[AINC_POST_DEC]
3373               = address_cost (addr, mem_mode, as, speed);
3374         }
3375       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3376           || USE_STORE_PRE_DECREMENT (mem_mode))
3377         {
3378           addr = gen_rtx_PRE_INC (address_mode, reg0);
3379           has_preinc[mem_mode]
3380             = memory_address_addr_space_p (mem_mode, addr, as);
3381
3382           if (has_preinc[mem_mode])
3383             data->ainc_costs[AINC_PRE_INC]
3384               = address_cost (addr, mem_mode, as, speed);
3385         }
3386       if (USE_LOAD_POST_INCREMENT (mem_mode)
3387           || USE_STORE_POST_INCREMENT (mem_mode))
3388         {
3389           addr = gen_rtx_POST_INC (address_mode, reg0);
3390           has_postinc[mem_mode]
3391             = memory_address_addr_space_p (mem_mode, addr, as);
3392
3393           if (has_postinc[mem_mode])
3394             data->ainc_costs[AINC_POST_INC]
3395               = address_cost (addr, mem_mode, as, speed);
3396         }
3397       for (i = 0; i < 16; i++)
3398         {
3399           sym_p = i & 1;
3400           var_p = (i >> 1) & 1;
3401           off_p = (i >> 2) & 1;
3402           rat_p = (i >> 3) & 1;
3403
3404           addr = reg0;
3405           if (rat_p)
3406             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3407                                    gen_int_mode (rat, address_mode));
3408
3409           if (var_p)
3410             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3411
3412           if (sym_p)
3413             {
3414               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3415               /* ??? We can run into trouble with some backends by presenting
3416                  it with symbols which haven't been properly passed through
3417                  targetm.encode_section_info.  By setting the local bit, we
3418                  enhance the probability of things working.  */
3419               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3420
3421               if (off_p)
3422                 base = gen_rtx_fmt_e (CONST, address_mode,
3423                                       gen_rtx_fmt_ee
3424                                         (PLUS, address_mode, base,
3425                                          gen_int_mode (off, address_mode)));
3426             }
3427           else if (off_p)
3428             base = gen_int_mode (off, address_mode);
3429           else
3430             base = NULL_RTX;
3431
3432           if (base)
3433             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3434
3435           start_sequence ();
3436           /* To avoid splitting addressing modes, pretend that no cse will
3437              follow.  */
3438           old_cse_not_expected = cse_not_expected;
3439           cse_not_expected = true;
3440           addr = memory_address_addr_space (mem_mode, addr, as);
3441           cse_not_expected = old_cse_not_expected;
3442           seq = get_insns ();
3443           end_sequence ();
3444
3445           acost = seq_cost (seq, speed);
3446           acost += address_cost (addr, mem_mode, as, speed);
3447
3448           if (!acost)
3449             acost = 1;
3450           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3451         }
3452
3453       /* On some targets, it is quite expensive to load symbol to a register,
3454          which makes addresses that contain symbols look much more expensive.
3455          However, the symbol will have to be loaded in any case before the
3456          loop (and quite likely we have it in register already), so it does not
3457          make much sense to penalize them too heavily.  So make some final
3458          tweaks for the SYMBOL_PRESENT modes:
3459
3460          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3461          var is cheaper, use this mode with small penalty.
3462          If VAR_PRESENT is true, try whether the mode with
3463          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3464          if this is the case, use it.  */
3465       add_c = add_cost (speed, address_mode);
3466       for (i = 0; i < 8; i++)
3467         {
3468           var_p = i & 1;
3469           off_p = (i >> 1) & 1;
3470           rat_p = (i >> 2) & 1;
3471
3472           acost = data->costs[0][1][off_p][rat_p] + 1;
3473           if (var_p)
3474             acost += add_c;
3475
3476           if (acost < data->costs[1][var_p][off_p][rat_p])
3477             data->costs[1][var_p][off_p][rat_p] = acost;
3478         }
3479
3480       if (dump_file && (dump_flags & TDF_DETAILS))
3481         {
3482           fprintf (dump_file, "Address costs:\n");
3483
3484           for (i = 0; i < 16; i++)
3485             {
3486               sym_p = i & 1;
3487               var_p = (i >> 1) & 1;
3488               off_p = (i >> 2) & 1;
3489               rat_p = (i >> 3) & 1;
3490
3491               fprintf (dump_file, "  ");
3492               if (sym_p)
3493                 fprintf (dump_file, "sym + ");
3494               if (var_p)
3495                 fprintf (dump_file, "var + ");
3496               if (off_p)
3497                 fprintf (dump_file, "cst + ");
3498               if (rat_p)
3499                 fprintf (dump_file, "rat * ");
3500
3501               acost = data->costs[sym_p][var_p][off_p][rat_p];
3502               fprintf (dump_file, "index costs %d\n", acost);
3503             }
3504           if (has_predec[mem_mode] || has_postdec[mem_mode]
3505               || has_preinc[mem_mode] || has_postinc[mem_mode])
3506             fprintf (dump_file, "  May include autoinc/dec\n");
3507           fprintf (dump_file, "\n");
3508         }
3509
3510       address_cost_data_list[data_index] = data;
3511     }
3512
3513   bits = GET_MODE_BITSIZE (address_mode);
3514   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3515   offset &= mask;
3516   if ((offset >> (bits - 1) & 1))
3517     offset |= ~mask;
3518   s_offset = offset;
3519
3520   autoinc = false;
3521   autoinc_type = AINC_NONE;
3522   msize = GET_MODE_SIZE (mem_mode);
3523   autoinc_offset = offset;
3524   if (stmt_after_inc)
3525     autoinc_offset += ratio * cstep;
3526   if (symbol_present || var_present || ratio != 1)
3527     autoinc = false;
3528   else
3529     {
3530       if (has_postinc[mem_mode] && autoinc_offset == 0
3531           && msize == cstep)
3532         autoinc_type = AINC_POST_INC;
3533       else if (has_postdec[mem_mode] && autoinc_offset == 0
3534                && msize == -cstep)
3535         autoinc_type = AINC_POST_DEC;
3536       else if (has_preinc[mem_mode] && autoinc_offset == msize
3537                && msize == cstep)
3538         autoinc_type = AINC_PRE_INC;
3539       else if (has_predec[mem_mode] && autoinc_offset == -msize
3540                && msize == -cstep)
3541         autoinc_type = AINC_PRE_DEC;
3542
3543       if (autoinc_type != AINC_NONE)
3544         autoinc = true;
3545     }
3546
3547   cost = 0;
3548   offset_p = (s_offset != 0
3549               && data->min_offset <= s_offset
3550               && s_offset <= data->max_offset);
3551   ratio_p = (ratio != 1
3552              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3553
3554   if (ratio != 1 && !ratio_p)
3555     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3556
3557   if (s_offset && !offset_p && !symbol_present)
3558     cost += add_cost (speed, address_mode);
3559
3560   if (may_autoinc)
3561     *may_autoinc = autoinc;
3562   if (autoinc)
3563     acost = data->ainc_costs[autoinc_type];
3564   else
3565     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3566   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3567   return new_cost (cost + acost, complexity);
3568 }
3569
3570  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3571     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3572     calculating the operands of EXPR.  Returns true if successful, and returns
3573     the cost in COST.  */
3574
3575 static bool
3576 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3577                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3578 {
3579   comp_cost res;
3580   tree op1 = TREE_OPERAND (expr, 1);
3581   tree cst = TREE_OPERAND (mult, 1);
3582   tree multop = TREE_OPERAND (mult, 0);
3583   int m = exact_log2 (int_cst_value (cst));
3584   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3585   int sa_cost;
3586   bool equal_p = false;
3587
3588   if (!(m >= 0 && m < maxm))
3589     return false;
3590
3591   if (operand_equal_p (op1, mult, 0))
3592     equal_p = true;
3593
3594   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3595              ? shiftadd_cost (speed, mode, m)
3596              : (equal_p
3597                 ? shiftsub1_cost (speed, mode, m)
3598                 : shiftsub0_cost (speed, mode, m)));
3599   res = new_cost (sa_cost, 0);
3600   res = add_costs (res, equal_p ? cost0 : cost1);
3601
3602   STRIP_NOPS (multop);
3603   if (!is_gimple_val (multop))
3604     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3605
3606   *cost = res;
3607   return true;
3608 }
3609
3610 /* Estimates cost of forcing expression EXPR into a variable.  */
3611
3612 static comp_cost
3613 force_expr_to_var_cost (tree expr, bool speed)
3614 {
3615   static bool costs_initialized = false;
3616   static unsigned integer_cost [2];
3617   static unsigned symbol_cost [2];
3618   static unsigned address_cost [2];
3619   tree op0, op1;
3620   comp_cost cost0, cost1, cost;
3621   machine_mode mode;
3622
3623   if (!costs_initialized)
3624     {
3625       tree type = build_pointer_type (integer_type_node);
3626       tree var, addr;
3627       rtx x;
3628       int i;
3629
3630       var = create_tmp_var_raw (integer_type_node, "test_var");
3631       TREE_STATIC (var) = 1;
3632       x = produce_memory_decl_rtl (var, NULL);
3633       SET_DECL_RTL (var, x);
3634
3635       addr = build1 (ADDR_EXPR, type, var);
3636
3637
3638       for (i = 0; i < 2; i++)
3639         {
3640           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3641                                                              2000), i);
3642
3643           symbol_cost[i] = computation_cost (addr, i) + 1;
3644
3645           address_cost[i]
3646             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3647           if (dump_file && (dump_flags & TDF_DETAILS))
3648             {
3649               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3650               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3651               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3652               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3653               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3654               fprintf (dump_file, "\n");
3655             }
3656         }
3657
3658       costs_initialized = true;
3659     }
3660
3661   STRIP_NOPS (expr);
3662
3663   if (SSA_VAR_P (expr))
3664     return no_cost;
3665
3666   if (is_gimple_min_invariant (expr))
3667     {
3668       if (TREE_CODE (expr) == INTEGER_CST)
3669         return new_cost (integer_cost [speed], 0);
3670
3671       if (TREE_CODE (expr) == ADDR_EXPR)
3672         {
3673           tree obj = TREE_OPERAND (expr, 0);
3674
3675           if (TREE_CODE (obj) == VAR_DECL
3676               || TREE_CODE (obj) == PARM_DECL
3677               || TREE_CODE (obj) == RESULT_DECL)
3678             return new_cost (symbol_cost [speed], 0);
3679         }
3680
3681       return new_cost (address_cost [speed], 0);
3682     }
3683
3684   switch (TREE_CODE (expr))
3685     {
3686     case POINTER_PLUS_EXPR:
3687     case PLUS_EXPR:
3688     case MINUS_EXPR:
3689     case MULT_EXPR:
3690       op0 = TREE_OPERAND (expr, 0);
3691       op1 = TREE_OPERAND (expr, 1);
3692       STRIP_NOPS (op0);
3693       STRIP_NOPS (op1);
3694       break;
3695
3696     CASE_CONVERT:
3697     case NEGATE_EXPR:
3698       op0 = TREE_OPERAND (expr, 0);
3699       STRIP_NOPS (op0);
3700       op1 = NULL_TREE;
3701       break;
3702
3703     default:
3704       /* Just an arbitrary value, FIXME.  */
3705       return new_cost (target_spill_cost[speed], 0);
3706     }
3707
3708   if (op0 == NULL_TREE
3709       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3710     cost0 = no_cost;
3711   else
3712     cost0 = force_expr_to_var_cost (op0, speed);
3713
3714   if (op1 == NULL_TREE
3715       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3716     cost1 = no_cost;
3717   else
3718     cost1 = force_expr_to_var_cost (op1, speed);
3719
3720   mode = TYPE_MODE (TREE_TYPE (expr));
3721   switch (TREE_CODE (expr))
3722     {
3723     case POINTER_PLUS_EXPR:
3724     case PLUS_EXPR:
3725     case MINUS_EXPR:
3726     case NEGATE_EXPR:
3727       cost = new_cost (add_cost (speed, mode), 0);
3728       if (TREE_CODE (expr) != NEGATE_EXPR)
3729         {
3730           tree mult = NULL_TREE;
3731           comp_cost sa_cost;
3732           if (TREE_CODE (op1) == MULT_EXPR)
3733             mult = op1;
3734           else if (TREE_CODE (op0) == MULT_EXPR)
3735             mult = op0;
3736
3737           if (mult != NULL_TREE
3738               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3739               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3740                                     speed, &sa_cost))
3741             return sa_cost;
3742         }
3743       break;
3744
3745     CASE_CONVERT:
3746       {
3747         tree inner_mode, outer_mode;
3748         outer_mode = TREE_TYPE (expr);
3749         inner_mode = TREE_TYPE (op0);
3750         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3751                                        TYPE_MODE (inner_mode), speed), 0);
3752       }
3753       break;
3754
3755     case MULT_EXPR:
3756       if (cst_and_fits_in_hwi (op0))
3757         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3758                                              mode, speed), 0);
3759       else if (cst_and_fits_in_hwi (op1))
3760         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3761                                              mode, speed), 0);
3762       else
3763         return new_cost (target_spill_cost [speed], 0);
3764       break;
3765
3766     default:
3767       gcc_unreachable ();
3768     }
3769
3770   cost = add_costs (cost, cost0);
3771   cost = add_costs (cost, cost1);
3772
3773   /* Bound the cost by target_spill_cost.  The parts of complicated
3774      computations often are either loop invariant or at least can
3775      be shared between several iv uses, so letting this grow without
3776      limits would not give reasonable results.  */
3777   if (cost.cost > (int) target_spill_cost [speed])
3778     cost.cost = target_spill_cost [speed];
3779
3780   return cost;
3781 }
3782
3783 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3784    invariants the computation depends on.  */
3785
3786 static comp_cost
3787 force_var_cost (struct ivopts_data *data,
3788                 tree expr, bitmap *depends_on)
3789 {
3790   if (depends_on)
3791     {
3792       fd_ivopts_data = data;
3793       walk_tree (&expr, find_depends, depends_on, NULL);
3794     }
3795
3796   return force_expr_to_var_cost (expr, data->speed);
3797 }
3798
3799 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3800    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3801    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3802    invariants the computation depends on.  */
3803
3804 static comp_cost
3805 split_address_cost (struct ivopts_data *data,
3806                     tree addr, bool *symbol_present, bool *var_present,
3807                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3808 {
3809   tree core;
3810   HOST_WIDE_INT bitsize;
3811   HOST_WIDE_INT bitpos;
3812   tree toffset;
3813   machine_mode mode;
3814   int unsignedp, volatilep;
3815
3816   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3817                               &unsignedp, &volatilep, false);
3818
3819   if (toffset != 0
3820       || bitpos % BITS_PER_UNIT != 0
3821       || TREE_CODE (core) != VAR_DECL)
3822     {
3823       *symbol_present = false;
3824       *var_present = true;
3825       fd_ivopts_data = data;
3826       walk_tree (&addr, find_depends, depends_on, NULL);
3827       return new_cost (target_spill_cost[data->speed], 0);
3828     }
3829
3830   *offset += bitpos / BITS_PER_UNIT;
3831   if (TREE_STATIC (core)
3832       || DECL_EXTERNAL (core))
3833     {
3834       *symbol_present = true;
3835       *var_present = false;
3836       return no_cost;
3837     }
3838
3839   *symbol_present = false;
3840   *var_present = true;
3841   return no_cost;
3842 }
3843
3844 /* Estimates cost of expressing difference of addresses E1 - E2 as
3845    var + symbol + offset.  The value of offset is added to OFFSET,
3846    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3847    part is missing.  DEPENDS_ON is a set of the invariants the computation
3848    depends on.  */
3849
3850 static comp_cost
3851 ptr_difference_cost (struct ivopts_data *data,
3852                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3853                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3854 {
3855   HOST_WIDE_INT diff = 0;
3856   aff_tree aff_e1, aff_e2;
3857   tree type;
3858
3859   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3860
3861   if (ptr_difference_const (e1, e2, &diff))
3862     {
3863       *offset += diff;
3864       *symbol_present = false;
3865       *var_present = false;
3866       return no_cost;
3867     }
3868
3869   if (integer_zerop (e2))
3870     return split_address_cost (data, TREE_OPERAND (e1, 0),
3871                                symbol_present, var_present, offset, depends_on);
3872
3873   *symbol_present = false;
3874   *var_present = true;
3875
3876   type = signed_type_for (TREE_TYPE (e1));
3877   tree_to_aff_combination (e1, type, &aff_e1);
3878   tree_to_aff_combination (e2, type, &aff_e2);
3879   aff_combination_scale (&aff_e2, -1);
3880   aff_combination_add (&aff_e1, &aff_e2);
3881
3882   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3883 }
3884
3885 /* Estimates cost of expressing difference E1 - E2 as
3886    var + symbol + offset.  The value of offset is added to OFFSET,
3887    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3888    part is missing.  DEPENDS_ON is a set of the invariants the computation
3889    depends on.  */
3890
3891 static comp_cost
3892 difference_cost (struct ivopts_data *data,
3893                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3894                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3895 {
3896   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3897   unsigned HOST_WIDE_INT off1, off2;
3898   aff_tree aff_e1, aff_e2;
3899   tree type;
3900
3901   e1 = strip_offset (e1, &off1);
3902   e2 = strip_offset (e2, &off2);
3903   *offset += off1 - off2;
3904
3905   STRIP_NOPS (e1);
3906   STRIP_NOPS (e2);
3907
3908   if (TREE_CODE (e1) == ADDR_EXPR)
3909     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3910                                 offset, depends_on);
3911   *symbol_present = false;
3912
3913   if (operand_equal_p (e1, e2, 0))
3914     {
3915       *var_present = false;
3916       return no_cost;
3917     }
3918
3919   *var_present = true;
3920
3921   if (integer_zerop (e2))
3922     return force_var_cost (data, e1, depends_on);
3923
3924   if (integer_zerop (e1))
3925     {
3926       comp_cost cost = force_var_cost (data, e2, depends_on);
3927       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3928       return cost;
3929     }
3930
3931   type = signed_type_for (TREE_TYPE (e1));
3932   tree_to_aff_combination (e1, type, &aff_e1);
3933   tree_to_aff_combination (e2, type, &aff_e2);
3934   aff_combination_scale (&aff_e2, -1);
3935   aff_combination_add (&aff_e1, &aff_e2);
3936
3937   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3938 }
3939
3940 /* Returns true if AFF1 and AFF2 are identical.  */
3941
3942 static bool
3943 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3944 {
3945   unsigned i;
3946
3947   if (aff1->n != aff2->n)
3948     return false;
3949
3950   for (i = 0; i < aff1->n; i++)
3951     {
3952       if (aff1->elts[i].coef != aff2->elts[i].coef)
3953         return false;
3954
3955       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3956         return false;
3957     }
3958   return true;
3959 }
3960
3961 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3962
3963 static int
3964 get_expr_id (struct ivopts_data *data, tree expr)
3965 {
3966   struct iv_inv_expr_ent ent;
3967   struct iv_inv_expr_ent **slot;
3968
3969   ent.expr = expr;
3970   ent.hash = iterative_hash_expr (expr, 0);
3971   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3972   if (*slot)
3973     return (*slot)->id;
3974
3975   *slot = XNEW (struct iv_inv_expr_ent);
3976   (*slot)->expr = expr;
3977   (*slot)->hash = ent.hash;
3978   (*slot)->id = data->inv_expr_id++;
3979   return (*slot)->id;
3980 }
3981
3982 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3983    requires a new compiler generated temporary.  Returns -1 otherwise.
3984    ADDRESS_P is a flag indicating if the expression is for address
3985    computation.  */
3986
3987 static int
3988 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3989                             tree cbase, HOST_WIDE_INT ratio,
3990                             bool address_p)
3991 {
3992   aff_tree ubase_aff, cbase_aff;
3993   tree expr, ub, cb;
3994
3995   STRIP_NOPS (ubase);
3996   STRIP_NOPS (cbase);
3997   ub = ubase;
3998   cb = cbase;
3999
4000   if ((TREE_CODE (ubase) == INTEGER_CST)
4001       && (TREE_CODE (cbase) == INTEGER_CST))
4002     return -1;
4003
4004   /* Strips the constant part. */
4005   if (TREE_CODE (ubase) == PLUS_EXPR
4006       || TREE_CODE (ubase) == MINUS_EXPR
4007       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4008     {
4009       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4010         ubase = TREE_OPERAND (ubase, 0);
4011     }
4012
4013   /* Strips the constant part. */
4014   if (TREE_CODE (cbase) == PLUS_EXPR
4015       || TREE_CODE (cbase) == MINUS_EXPR
4016       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4017     {
4018       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4019         cbase = TREE_OPERAND (cbase, 0);
4020     }
4021
4022   if (address_p)
4023     {
4024       if (((TREE_CODE (ubase) == SSA_NAME)
4025            || (TREE_CODE (ubase) == ADDR_EXPR
4026                && is_gimple_min_invariant (ubase)))
4027           && (TREE_CODE (cbase) == INTEGER_CST))
4028         return -1;
4029
4030       if (((TREE_CODE (cbase) == SSA_NAME)
4031            || (TREE_CODE (cbase) == ADDR_EXPR
4032                && is_gimple_min_invariant (cbase)))
4033           && (TREE_CODE (ubase) == INTEGER_CST))
4034         return -1;
4035     }
4036
4037   if (ratio == 1)
4038     {
4039       if (operand_equal_p (ubase, cbase, 0))
4040         return -1;
4041
4042       if (TREE_CODE (ubase) == ADDR_EXPR
4043           && TREE_CODE (cbase) == ADDR_EXPR)
4044         {
4045           tree usym, csym;
4046
4047           usym = TREE_OPERAND (ubase, 0);
4048           csym = TREE_OPERAND (cbase, 0);
4049           if (TREE_CODE (usym) == ARRAY_REF)
4050             {
4051               tree ind = TREE_OPERAND (usym, 1);
4052               if (TREE_CODE (ind) == INTEGER_CST
4053                   && tree_fits_shwi_p (ind)
4054                   && tree_to_shwi (ind) == 0)
4055                 usym = TREE_OPERAND (usym, 0);
4056             }
4057           if (TREE_CODE (csym) == ARRAY_REF)
4058             {
4059               tree ind = TREE_OPERAND (csym, 1);
4060               if (TREE_CODE (ind) == INTEGER_CST
4061                   && tree_fits_shwi_p (ind)
4062                   && tree_to_shwi (ind) == 0)
4063                 csym = TREE_OPERAND (csym, 0);
4064             }
4065           if (operand_equal_p (usym, csym, 0))
4066             return -1;
4067         }
4068       /* Now do more complex comparison  */
4069       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4070       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4071       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4072         return -1;
4073     }
4074
4075   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4076   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4077
4078   aff_combination_scale (&cbase_aff, -1 * ratio);
4079   aff_combination_add (&ubase_aff, &cbase_aff);
4080   expr = aff_combination_to_tree (&ubase_aff);
4081   return get_expr_id (data, expr);
4082 }
4083
4084
4085
4086 /* Determines the cost of the computation by that USE is expressed
4087    from induction variable CAND.  If ADDRESS_P is true, we just need
4088    to create an address from it, otherwise we want to get it into
4089    register.  A set of invariants we depend on is stored in
4090    DEPENDS_ON.  AT is the statement at that the value is computed.
4091    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4092    addressing is likely.  */
4093
4094 static comp_cost
4095 get_computation_cost_at (struct ivopts_data *data,
4096                          struct iv_use *use, struct iv_cand *cand,
4097                          bool address_p, bitmap *depends_on, gimple at,
4098                          bool *can_autoinc,
4099                          int *inv_expr_id)
4100 {
4101   tree ubase = use->iv->base, ustep = use->iv->step;
4102   tree cbase, cstep;
4103   tree utype = TREE_TYPE (ubase), ctype;
4104   unsigned HOST_WIDE_INT cstepi, offset = 0;
4105   HOST_WIDE_INT ratio, aratio;
4106   bool var_present, symbol_present, stmt_is_after_inc;
4107   comp_cost cost;
4108   widest_int rat;
4109   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4110   machine_mode mem_mode = (address_p
4111                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4112                                 : VOIDmode);
4113
4114   *depends_on = NULL;
4115
4116   /* Only consider real candidates.  */
4117   if (!cand->iv)
4118     return infinite_cost;
4119
4120   cbase = cand->iv->base;
4121   cstep = cand->iv->step;
4122   ctype = TREE_TYPE (cbase);
4123
4124   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4125     {
4126       /* We do not have a precision to express the values of use.  */
4127       return infinite_cost;
4128     }
4129
4130   if (address_p
4131       || (use->iv->base_object
4132           && cand->iv->base_object
4133           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4134           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4135     {
4136       /* Do not try to express address of an object with computation based
4137          on address of a different object.  This may cause problems in rtl
4138          level alias analysis (that does not expect this to be happening,
4139          as this is illegal in C), and would be unlikely to be useful
4140          anyway.  */
4141       if (use->iv->base_object
4142           && cand->iv->base_object
4143           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4144         return infinite_cost;
4145     }
4146
4147   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4148     {
4149       /* TODO -- add direct handling of this case.  */
4150       goto fallback;
4151     }
4152
4153   /* CSTEPI is removed from the offset in case statement is after the
4154      increment.  If the step is not constant, we use zero instead.
4155      This is a bit imprecise (there is the extra addition), but
4156      redundancy elimination is likely to transform the code so that
4157      it uses value of the variable before increment anyway,
4158      so it is not that much unrealistic.  */
4159   if (cst_and_fits_in_hwi (cstep))
4160     cstepi = int_cst_value (cstep);
4161   else
4162     cstepi = 0;
4163
4164   if (!constant_multiple_of (ustep, cstep, &rat))
4165     return infinite_cost;
4166
4167   if (wi::fits_shwi_p (rat))
4168     ratio = rat.to_shwi ();
4169   else
4170     return infinite_cost;
4171
4172   STRIP_NOPS (cbase);
4173   ctype = TREE_TYPE (cbase);
4174
4175   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4176
4177   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4178      or ratio == 1, it is better to handle this like
4179
4180      ubase - ratio * cbase + ratio * var
4181
4182      (also holds in the case ratio == -1, TODO.  */
4183
4184   if (cst_and_fits_in_hwi (cbase))
4185     {
4186       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4187       cost = difference_cost (data,
4188                               ubase, build_int_cst (utype, 0),
4189                               &symbol_present, &var_present, &offset,
4190                               depends_on);
4191       cost.cost /= avg_loop_niter (data->current_loop);
4192     }
4193   else if (ratio == 1)
4194     {
4195       tree real_cbase = cbase;
4196
4197       /* Check to see if any adjustment is needed.  */
4198       if (cstepi == 0 && stmt_is_after_inc)
4199         {
4200           aff_tree real_cbase_aff;
4201           aff_tree cstep_aff;
4202
4203           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4204                                    &real_cbase_aff);
4205           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4206
4207           aff_combination_add (&real_cbase_aff, &cstep_aff);
4208           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4209         }
4210
4211       cost = difference_cost (data,
4212                               ubase, real_cbase,
4213                               &symbol_present, &var_present, &offset,
4214                               depends_on);
4215       cost.cost /= avg_loop_niter (data->current_loop);
4216     }
4217   else if (address_p
4218            && !POINTER_TYPE_P (ctype)
4219            && multiplier_allowed_in_address_p
4220                 (ratio, mem_mode,
4221                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4222     {
4223       cbase
4224         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4225       cost = difference_cost (data,
4226                               ubase, cbase,
4227                               &symbol_present, &var_present, &offset,
4228                               depends_on);
4229       cost.cost /= avg_loop_niter (data->current_loop);
4230     }
4231   else
4232     {
4233       cost = force_var_cost (data, cbase, depends_on);
4234       cost = add_costs (cost,
4235                         difference_cost (data,
4236                                          ubase, build_int_cst (utype, 0),
4237                                          &symbol_present, &var_present,
4238                                          &offset, depends_on));
4239       cost.cost /= avg_loop_niter (data->current_loop);
4240       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4241     }
4242
4243   if (inv_expr_id)
4244     {
4245       *inv_expr_id =
4246           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4247       /* Clear depends on.  */
4248       if (*inv_expr_id != -1 && depends_on && *depends_on)
4249         bitmap_clear (*depends_on);
4250     }
4251
4252   /* If we are after the increment, the value of the candidate is higher by
4253      one iteration.  */
4254   if (stmt_is_after_inc)
4255     offset -= ratio * cstepi;
4256
4257   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4258      (symbol/var1/const parts may be omitted).  If we are looking for an
4259      address, find the cost of addressing this.  */
4260   if (address_p)
4261     return add_costs (cost,
4262                       get_address_cost (symbol_present, var_present,
4263                                         offset, ratio, cstepi,
4264                                         mem_mode,
4265                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4266                                         speed, stmt_is_after_inc,
4267                                         can_autoinc));
4268
4269   /* Otherwise estimate the costs for computing the expression.  */
4270   if (!symbol_present && !var_present && !offset)
4271     {
4272       if (ratio != 1)
4273         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4274       return cost;
4275     }
4276
4277   /* Symbol + offset should be compile-time computable so consider that they
4278       are added once to the variable, if present.  */
4279   if (var_present && (symbol_present || offset))
4280     cost.cost += adjust_setup_cost (data,
4281                                     add_cost (speed, TYPE_MODE (ctype)));
4282
4283   /* Having offset does not affect runtime cost in case it is added to
4284      symbol, but it increases complexity.  */
4285   if (offset)
4286     cost.complexity++;
4287
4288   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4289
4290   aratio = ratio > 0 ? ratio : -ratio;
4291   if (aratio != 1)
4292     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4293   return cost;
4294
4295 fallback:
4296   if (can_autoinc)
4297     *can_autoinc = false;
4298
4299   {
4300     /* Just get the expression, expand it and measure the cost.  */
4301     tree comp = get_computation_at (data->current_loop, use, cand, at);
4302
4303     if (!comp)
4304       return infinite_cost;
4305
4306     if (address_p)
4307       comp = build_simple_mem_ref (comp);
4308
4309     return new_cost (computation_cost (comp, speed), 0);
4310   }
4311 }
4312
4313 /* Determines the cost of the computation by that USE is expressed
4314    from induction variable CAND.  If ADDRESS_P is true, we just need
4315    to create an address from it, otherwise we want to get it into
4316    register.  A set of invariants we depend on is stored in
4317    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4318    autoinc addressing is likely.  */
4319
4320 static comp_cost
4321 get_computation_cost (struct ivopts_data *data,
4322                       struct iv_use *use, struct iv_cand *cand,
4323                       bool address_p, bitmap *depends_on,
4324                       bool *can_autoinc, int *inv_expr_id)
4325 {
4326   return get_computation_cost_at (data,
4327                                   use, cand, address_p, depends_on, use->stmt,
4328                                   can_autoinc, inv_expr_id);
4329 }
4330
4331 /* Determines cost of basing replacement of USE on CAND in a generic
4332    expression.  */
4333
4334 static bool
4335 determine_use_iv_cost_generic (struct ivopts_data *data,
4336                                struct iv_use *use, struct iv_cand *cand)
4337 {
4338   bitmap depends_on;
4339   comp_cost cost;
4340   int inv_expr_id = -1;
4341
4342   /* The simple case first -- if we need to express value of the preserved
4343      original biv, the cost is 0.  This also prevents us from counting the
4344      cost of increment twice -- once at this use and once in the cost of
4345      the candidate.  */
4346   if (cand->pos == IP_ORIGINAL
4347       && cand->incremented_at == use->stmt)
4348     {
4349       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4350                        ERROR_MARK, -1);
4351       return true;
4352     }
4353
4354   cost = get_computation_cost (data, use, cand, false, &depends_on,
4355                                NULL, &inv_expr_id);
4356
4357   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4358                    inv_expr_id);
4359
4360   return !infinite_cost_p (cost);
4361 }
4362
4363 /* Determines cost of basing replacement of USE on CAND in an address.  */
4364
4365 static bool
4366 determine_use_iv_cost_address (struct ivopts_data *data,
4367                                struct iv_use *use, struct iv_cand *cand)
4368 {
4369   bitmap depends_on;
4370   bool can_autoinc;
4371   int inv_expr_id = -1;
4372   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4373                                          &can_autoinc, &inv_expr_id);
4374
4375   if (cand->ainc_use == use)
4376     {
4377       if (can_autoinc)
4378         cost.cost -= cand->cost_step;
4379       /* If we generated the candidate solely for exploiting autoincrement
4380          opportunities, and it turns out it can't be used, set the cost to
4381          infinity to make sure we ignore it.  */
4382       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4383         cost = infinite_cost;
4384     }
4385   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4386                    inv_expr_id);
4387
4388   return !infinite_cost_p (cost);
4389 }
4390
4391 /* Computes value of candidate CAND at position AT in iteration NITER, and
4392    stores it to VAL.  */
4393
4394 static void
4395 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4396                aff_tree *val)
4397 {
4398   aff_tree step, delta, nit;
4399   struct iv *iv = cand->iv;
4400   tree type = TREE_TYPE (iv->base);
4401   tree steptype = type;
4402   if (POINTER_TYPE_P (type))
4403     steptype = sizetype;
4404   steptype = unsigned_type_for (type);
4405
4406   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4407   aff_combination_convert (&step, steptype);
4408   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4409   aff_combination_convert (&nit, steptype);
4410   aff_combination_mult (&nit, &step, &delta);
4411   if (stmt_after_increment (loop, cand, at))
4412     aff_combination_add (&delta, &step);
4413
4414   tree_to_aff_combination (iv->base, type, val);
4415   if (!POINTER_TYPE_P (type))
4416     aff_combination_convert (val, steptype);
4417   aff_combination_add (val, &delta);
4418 }
4419
4420 /* Returns period of induction variable iv.  */
4421
4422 static tree
4423 iv_period (struct iv *iv)
4424 {
4425   tree step = iv->step, period, type;
4426   tree pow2div;
4427
4428   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4429
4430   type = unsigned_type_for (TREE_TYPE (step));
4431   /* Period of the iv is lcm (step, type_range)/step -1,
4432      i.e., N*type_range/step - 1. Since type range is power
4433      of two, N == (step >> num_of_ending_zeros_binary (step),
4434      so the final result is
4435
4436        (type_range >> num_of_ending_zeros_binary (step)) - 1
4437
4438   */
4439   pow2div = num_ending_zeros (step);
4440
4441   period = build_low_bits_mask (type,
4442                                 (TYPE_PRECISION (type)
4443                                  - tree_to_uhwi (pow2div)));
4444
4445   return period;
4446 }
4447
4448 /* Returns the comparison operator used when eliminating the iv USE.  */
4449
4450 static enum tree_code
4451 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4452 {
4453   struct loop *loop = data->current_loop;
4454   basic_block ex_bb;
4455   edge exit;
4456
4457   ex_bb = gimple_bb (use->stmt);
4458   exit = EDGE_SUCC (ex_bb, 0);
4459   if (flow_bb_inside_loop_p (loop, exit->dest))
4460     exit = EDGE_SUCC (ex_bb, 1);
4461
4462   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4463 }
4464
4465 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4466    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4467    calculation is performed in non-wrapping type.
4468
4469    TODO: More generally, we could test for the situation that
4470          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4471          This would require knowing the sign of OFFSET.  */
4472
4473 static bool
4474 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4475 {
4476   enum tree_code code;
4477   tree e1, e2;
4478   aff_tree aff_e1, aff_e2, aff_offset;
4479
4480   if (!nowrap_type_p (TREE_TYPE (base)))
4481     return false;
4482
4483   base = expand_simple_operations (base);
4484
4485   if (TREE_CODE (base) == SSA_NAME)
4486     {
4487       gimple stmt = SSA_NAME_DEF_STMT (base);
4488
4489       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4490         return false;
4491
4492       code = gimple_assign_rhs_code (stmt);
4493       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4494         return false;
4495
4496       e1 = gimple_assign_rhs1 (stmt);
4497       e2 = gimple_assign_rhs2 (stmt);
4498     }
4499   else
4500     {
4501       code = TREE_CODE (base);
4502       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4503         return false;
4504       e1 = TREE_OPERAND (base, 0);
4505       e2 = TREE_OPERAND (base, 1);
4506     }
4507
4508   /* Use affine expansion as deeper inspection to prove the equality.  */
4509   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4510                                   &aff_e2, &data->name_expansion_cache);
4511   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4512                                   &aff_offset, &data->name_expansion_cache);
4513   aff_combination_scale (&aff_offset, -1);
4514   switch (code)
4515     {
4516     case PLUS_EXPR:
4517       aff_combination_add (&aff_e2, &aff_offset);
4518       if (aff_combination_zero_p (&aff_e2))
4519         return true;
4520
4521       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4522                                       &aff_e1, &data->name_expansion_cache);
4523       aff_combination_add (&aff_e1, &aff_offset);
4524       return aff_combination_zero_p (&aff_e1);
4525
4526     case POINTER_PLUS_EXPR:
4527       aff_combination_add (&aff_e2, &aff_offset);
4528       return aff_combination_zero_p (&aff_e2);
4529
4530     default:
4531       return false;
4532     }
4533 }
4534
4535 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4536    comparison with CAND.  NITER describes the number of iterations of
4537    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4538
4539    We aim to handle the following situation:
4540
4541    sometype *base, *p;
4542    int a, b, i;
4543
4544    i = a;
4545    p = p_0 = base + a;
4546
4547    do
4548      {
4549        bla (*p);
4550        p++;
4551        i++;
4552      }
4553    while (i < b);
4554
4555    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4556    We aim to optimize this to
4557
4558    p = p_0 = base + a;
4559    do
4560      {
4561        bla (*p);
4562        p++;
4563      }
4564    while (p < p_0 - a + b);
4565
4566    This preserves the correctness, since the pointer arithmetics does not
4567    overflow.  More precisely:
4568
4569    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4570       overflow in computing it or the values of p.
4571    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4572       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4573
4574 static bool
4575 iv_elimination_compare_lt (struct ivopts_data *data,
4576                            struct iv_cand *cand, enum tree_code *comp_p,
4577                            struct tree_niter_desc *niter)
4578 {
4579   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4580   struct aff_tree nit, tmpa, tmpb;
4581   enum tree_code comp;
4582   HOST_WIDE_INT step;
4583
4584   /* We need to know that the candidate induction variable does not overflow.
4585      While more complex analysis may be used to prove this, for now just
4586      check that the variable appears in the original program and that it
4587      is computed in a type that guarantees no overflows.  */
4588   cand_type = TREE_TYPE (cand->iv->base);
4589   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4590     return false;
4591
4592   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4593      the calculation of the BOUND could overflow, making the comparison
4594      invalid.  */
4595   if (!data->loop_single_exit_p)
4596     return false;
4597
4598   /* We need to be able to decide whether candidate is increasing or decreasing
4599      in order to choose the right comparison operator.  */
4600   if (!cst_and_fits_in_hwi (cand->iv->step))
4601     return false;
4602   step = int_cst_value (cand->iv->step);
4603
4604   /* Check that the number of iterations matches the expected pattern:
4605      a + 1 > b ? 0 : b - a - 1.  */
4606   mbz = niter->may_be_zero;
4607   if (TREE_CODE (mbz) == GT_EXPR)
4608     {
4609       /* Handle a + 1 > b.  */
4610       tree op0 = TREE_OPERAND (mbz, 0);
4611       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4612         {
4613           a = TREE_OPERAND (op0, 0);
4614           b = TREE_OPERAND (mbz, 1);
4615         }
4616       else
4617         return false;
4618     }
4619   else if (TREE_CODE (mbz) == LT_EXPR)
4620     {
4621       tree op1 = TREE_OPERAND (mbz, 1);
4622
4623       /* Handle b < a + 1.  */
4624       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4625         {
4626           a = TREE_OPERAND (op1, 0);
4627           b = TREE_OPERAND (mbz, 0);
4628         }
4629       else
4630         return false;
4631     }
4632   else
4633     return false;
4634
4635   /* Expected number of iterations is B - A - 1.  Check that it matches
4636      the actual number, i.e., that B - A - NITER = 1.  */
4637   tree_to_aff_combination (niter->niter, nit_type, &nit);
4638   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4639   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4640   aff_combination_scale (&nit, -1);
4641   aff_combination_scale (&tmpa, -1);
4642   aff_combination_add (&tmpb, &tmpa);
4643   aff_combination_add (&tmpb, &nit);
4644   if (tmpb.n != 0 || tmpb.offset != 1)
4645     return false;
4646
4647   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4648      overflow.  */
4649   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4650                         cand->iv->step,
4651                         fold_convert (TREE_TYPE (cand->iv->step), a));
4652   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4653     return false;
4654
4655   /* Determine the new comparison operator.  */
4656   comp = step < 0 ? GT_EXPR : LT_EXPR;
4657   if (*comp_p == NE_EXPR)
4658     *comp_p = comp;
4659   else if (*comp_p == EQ_EXPR)
4660     *comp_p = invert_tree_comparison (comp, false);
4661   else
4662     gcc_unreachable ();
4663
4664   return true;
4665 }
4666
4667 /* Check whether it is possible to express the condition in USE by comparison
4668    of candidate CAND.  If so, store the value compared with to BOUND, and the
4669    comparison operator to COMP.  */
4670
4671 static bool
4672 may_eliminate_iv (struct ivopts_data *data,
4673                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4674                   enum tree_code *comp)
4675 {
4676   basic_block ex_bb;
4677   edge exit;
4678   tree period;
4679   struct loop *loop = data->current_loop;
4680   aff_tree bnd;
4681   struct tree_niter_desc *desc = NULL;
4682
4683   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4684     return false;
4685
4686   /* For now works only for exits that dominate the loop latch.
4687      TODO: extend to other conditions inside loop body.  */
4688   ex_bb = gimple_bb (use->stmt);
4689   if (use->stmt != last_stmt (ex_bb)
4690       || gimple_code (use->stmt) != GIMPLE_COND
4691       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4692     return false;
4693
4694   exit = EDGE_SUCC (ex_bb, 0);
4695   if (flow_bb_inside_loop_p (loop, exit->dest))
4696     exit = EDGE_SUCC (ex_bb, 1);
4697   if (flow_bb_inside_loop_p (loop, exit->dest))
4698     return false;
4699
4700   desc = niter_for_exit (data, exit);
4701   if (!desc)
4702     return false;
4703
4704   /* Determine whether we can use the variable to test the exit condition.
4705      This is the case iff the period of the induction variable is greater
4706      than the number of iterations for which the exit condition is true.  */
4707   period = iv_period (cand->iv);
4708
4709   /* If the number of iterations is constant, compare against it directly.  */
4710   if (TREE_CODE (desc->niter) == INTEGER_CST)
4711     {
4712       /* See cand_value_at.  */
4713       if (stmt_after_increment (loop, cand, use->stmt))
4714         {
4715           if (!tree_int_cst_lt (desc->niter, period))
4716             return false;
4717         }
4718       else
4719         {
4720           if (tree_int_cst_lt (period, desc->niter))
4721             return false;
4722         }
4723     }
4724
4725   /* If not, and if this is the only possible exit of the loop, see whether
4726      we can get a conservative estimate on the number of iterations of the
4727      entire loop and compare against that instead.  */
4728   else
4729     {
4730       widest_int period_value, max_niter;
4731
4732       max_niter = desc->max;
4733       if (stmt_after_increment (loop, cand, use->stmt))
4734         max_niter += 1;
4735       period_value = wi::to_widest (period);
4736       if (wi::gtu_p (max_niter, period_value))
4737         {
4738           /* See if we can take advantage of inferred loop bound information.  */
4739           if (data->loop_single_exit_p)
4740             {
4741               if (!max_loop_iterations (loop, &max_niter))
4742                 return false;
4743               /* The loop bound is already adjusted by adding 1.  */
4744               if (wi::gtu_p (max_niter, period_value))
4745                 return false;
4746             }
4747           else
4748             return false;
4749         }
4750     }
4751
4752   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4753
4754   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4755                          aff_combination_to_tree (&bnd));
4756   *comp = iv_elimination_compare (data, use);
4757
4758   /* It is unlikely that computing the number of iterations using division
4759      would be more profitable than keeping the original induction variable.  */
4760   if (expression_expensive_p (*bound))
4761     return false;
4762
4763   /* Sometimes, it is possible to handle the situation that the number of
4764      iterations may be zero unless additional assumtions by using <
4765      instead of != in the exit condition.
4766
4767      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4768            base the exit condition on it.  However, that is often too
4769            expensive.  */
4770   if (!integer_zerop (desc->may_be_zero))
4771     return iv_elimination_compare_lt (data, cand, comp, desc);
4772
4773   return true;
4774 }
4775
4776  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4777     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4778
4779 static int
4780 parm_decl_cost (struct ivopts_data *data, tree bound)
4781 {
4782   tree sbound = bound;
4783   STRIP_NOPS (sbound);
4784
4785   if (TREE_CODE (sbound) == SSA_NAME
4786       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4787       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4788       && data->body_includes_call)
4789     return COSTS_N_INSNS (1);
4790
4791   return 0;
4792 }
4793
4794 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4795
4796 static bool
4797 determine_use_iv_cost_condition (struct ivopts_data *data,
4798                                  struct iv_use *use, struct iv_cand *cand)
4799 {
4800   tree bound = NULL_TREE;
4801   struct iv *cmp_iv;
4802   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4803   comp_cost elim_cost, express_cost, cost, bound_cost;
4804   bool ok;
4805   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4806   tree *control_var, *bound_cst;
4807   enum tree_code comp = ERROR_MARK;
4808
4809   /* Only consider real candidates.  */
4810   if (!cand->iv)
4811     {
4812       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4813                        ERROR_MARK, -1);
4814       return false;
4815     }
4816
4817   /* Try iv elimination.  */
4818   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4819     {
4820       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4821       if (elim_cost.cost == 0)
4822         elim_cost.cost = parm_decl_cost (data, bound);
4823       else if (TREE_CODE (bound) == INTEGER_CST)
4824         elim_cost.cost = 0;
4825       /* If we replace a loop condition 'i < n' with 'p < base + n',
4826          depends_on_elim will have 'base' and 'n' set, which implies
4827          that both 'base' and 'n' will be live during the loop.  More likely,
4828          'base + n' will be loop invariant, resulting in only one live value
4829          during the loop.  So in that case we clear depends_on_elim and set
4830         elim_inv_expr_id instead.  */
4831       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4832         {
4833           elim_inv_expr_id = get_expr_id (data, bound);
4834           bitmap_clear (depends_on_elim);
4835         }
4836       /* The bound is a loop invariant, so it will be only computed
4837          once.  */
4838       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4839     }
4840   else
4841     elim_cost = infinite_cost;
4842
4843   /* Try expressing the original giv.  If it is compared with an invariant,
4844      note that we cannot get rid of it.  */
4845   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4846                               NULL, &cmp_iv);
4847   gcc_assert (ok);
4848
4849   /* When the condition is a comparison of the candidate IV against
4850      zero, prefer this IV.
4851
4852      TODO: The constant that we're subtracting from the cost should
4853      be target-dependent.  This information should be added to the
4854      target costs for each backend.  */
4855   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4856       && integer_zerop (*bound_cst)
4857       && (operand_equal_p (*control_var, cand->var_after, 0)
4858           || operand_equal_p (*control_var, cand->var_before, 0)))
4859     elim_cost.cost -= 1;
4860
4861   express_cost = get_computation_cost (data, use, cand, false,
4862                                        &depends_on_express, NULL,
4863                                        &express_inv_expr_id);
4864   fd_ivopts_data = data;
4865   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4866
4867   /* Count the cost of the original bound as well.  */
4868   bound_cost = force_var_cost (data, *bound_cst, NULL);
4869   if (bound_cost.cost == 0)
4870     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4871   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4872     bound_cost.cost = 0;
4873   express_cost.cost += bound_cost.cost;
4874
4875   /* Choose the better approach, preferring the eliminated IV. */
4876   if (compare_costs (elim_cost, express_cost) <= 0)
4877     {
4878       cost = elim_cost;
4879       depends_on = depends_on_elim;
4880       depends_on_elim = NULL;
4881       inv_expr_id = elim_inv_expr_id;
4882     }
4883   else
4884     {
4885       cost = express_cost;
4886       depends_on = depends_on_express;
4887       depends_on_express = NULL;
4888       bound = NULL_TREE;
4889       comp = ERROR_MARK;
4890       inv_expr_id = express_inv_expr_id;
4891     }
4892
4893   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4894
4895   if (depends_on_elim)
4896     BITMAP_FREE (depends_on_elim);
4897   if (depends_on_express)
4898     BITMAP_FREE (depends_on_express);
4899
4900   return !infinite_cost_p (cost);
4901 }
4902
4903 /* Determines cost of basing replacement of USE on CAND.  Returns false
4904    if USE cannot be based on CAND.  */
4905
4906 static bool
4907 determine_use_iv_cost (struct ivopts_data *data,
4908                        struct iv_use *use, struct iv_cand *cand)
4909 {
4910   switch (use->type)
4911     {
4912     case USE_NONLINEAR_EXPR:
4913       return determine_use_iv_cost_generic (data, use, cand);
4914
4915     case USE_ADDRESS:
4916       return determine_use_iv_cost_address (data, use, cand);
4917
4918     case USE_COMPARE:
4919       return determine_use_iv_cost_condition (data, use, cand);
4920
4921     default:
4922       gcc_unreachable ();
4923     }
4924 }
4925
4926 /* Return true if get_computation_cost indicates that autoincrement is
4927    a possibility for the pair of USE and CAND, false otherwise.  */
4928
4929 static bool
4930 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4931                            struct iv_cand *cand)
4932 {
4933   bitmap depends_on;
4934   bool can_autoinc;
4935   comp_cost cost;
4936
4937   if (use->type != USE_ADDRESS)
4938     return false;
4939
4940   cost = get_computation_cost (data, use, cand, true, &depends_on,
4941                                &can_autoinc, NULL);
4942
4943   BITMAP_FREE (depends_on);
4944
4945   return !infinite_cost_p (cost) && can_autoinc;
4946 }
4947
4948 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4949    use that allows autoincrement, and set their AINC_USE if possible.  */
4950
4951 static void
4952 set_autoinc_for_original_candidates (struct ivopts_data *data)
4953 {
4954   unsigned i, j;
4955
4956   for (i = 0; i < n_iv_cands (data); i++)
4957     {
4958       struct iv_cand *cand = iv_cand (data, i);
4959       struct iv_use *closest_before = NULL;
4960       struct iv_use *closest_after = NULL;
4961       if (cand->pos != IP_ORIGINAL)
4962         continue;
4963
4964       for (j = 0; j < n_iv_uses (data); j++)
4965         {
4966           struct iv_use *use = iv_use (data, j);
4967           unsigned uid = gimple_uid (use->stmt);
4968
4969           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4970             continue;
4971
4972           if (uid < gimple_uid (cand->incremented_at)
4973               && (closest_before == NULL
4974                   || uid > gimple_uid (closest_before->stmt)))
4975             closest_before = use;
4976
4977           if (uid > gimple_uid (cand->incremented_at)
4978               && (closest_after == NULL
4979                   || uid < gimple_uid (closest_after->stmt)))
4980             closest_after = use;
4981         }
4982
4983       if (closest_before != NULL
4984           && autoinc_possible_for_pair (data, closest_before, cand))
4985         cand->ainc_use = closest_before;
4986       else if (closest_after != NULL
4987                && autoinc_possible_for_pair (data, closest_after, cand))
4988         cand->ainc_use = closest_after;
4989     }
4990 }
4991
4992 /* Finds the candidates for the induction variables.  */
4993
4994 static void
4995 find_iv_candidates (struct ivopts_data *data)
4996 {
4997   /* Add commonly used ivs.  */
4998   add_standard_iv_candidates (data);
4999
5000   /* Add old induction variables.  */
5001   add_old_ivs_candidates (data);
5002
5003   /* Add induction variables derived from uses.  */
5004   add_derived_ivs_candidates (data);
5005
5006   set_autoinc_for_original_candidates (data);
5007
5008   /* Record the important candidates.  */
5009   record_important_candidates (data);
5010 }
5011
5012 /* Determines costs of basing the use of the iv on an iv candidate.  */
5013
5014 static void
5015 determine_use_iv_costs (struct ivopts_data *data)
5016 {
5017   unsigned i, j;
5018   struct iv_use *use;
5019   struct iv_cand *cand;
5020   bitmap to_clear = BITMAP_ALLOC (NULL);
5021
5022   alloc_use_cost_map (data);
5023
5024   for (i = 0; i < n_iv_uses (data); i++)
5025     {
5026       use = iv_use (data, i);
5027
5028       if (data->consider_all_candidates)
5029         {
5030           for (j = 0; j < n_iv_cands (data); j++)
5031             {
5032               cand = iv_cand (data, j);
5033               determine_use_iv_cost (data, use, cand);
5034             }
5035         }
5036       else
5037         {
5038           bitmap_iterator bi;
5039
5040           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5041             {
5042               cand = iv_cand (data, j);
5043               if (!determine_use_iv_cost (data, use, cand))
5044                 bitmap_set_bit (to_clear, j);
5045             }
5046
5047           /* Remove the candidates for that the cost is infinite from
5048              the list of related candidates.  */
5049           bitmap_and_compl_into (use->related_cands, to_clear);
5050           bitmap_clear (to_clear);
5051         }
5052     }
5053
5054   BITMAP_FREE (to_clear);
5055
5056   if (dump_file && (dump_flags & TDF_DETAILS))
5057     {
5058       fprintf (dump_file, "Use-candidate costs:\n");
5059
5060       for (i = 0; i < n_iv_uses (data); i++)
5061         {
5062           use = iv_use (data, i);
5063
5064           fprintf (dump_file, "Use %d:\n", i);
5065           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5066           for (j = 0; j < use->n_map_members; j++)
5067             {
5068               if (!use->cost_map[j].cand
5069                   || infinite_cost_p (use->cost_map[j].cost))
5070                 continue;
5071
5072               fprintf (dump_file, "  %d\t%d\t%d\t",
5073                        use->cost_map[j].cand->id,
5074                        use->cost_map[j].cost.cost,
5075                        use->cost_map[j].cost.complexity);
5076               if (use->cost_map[j].depends_on)
5077                 bitmap_print (dump_file,
5078                               use->cost_map[j].depends_on, "","");
5079               if (use->cost_map[j].inv_expr_id != -1)
5080                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5081               fprintf (dump_file, "\n");
5082             }
5083
5084           fprintf (dump_file, "\n");
5085         }
5086       fprintf (dump_file, "\n");
5087     }
5088 }
5089
5090 /* Determines cost of the candidate CAND.  */
5091
5092 static void
5093 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5094 {
5095   comp_cost cost_base;
5096   unsigned cost, cost_step;
5097   tree base;
5098
5099   if (!cand->iv)
5100     {
5101       cand->cost = 0;
5102       return;
5103     }
5104
5105   /* There are two costs associated with the candidate -- its increment
5106      and its initialization.  The second is almost negligible for any loop
5107      that rolls enough, so we take it just very little into account.  */
5108
5109   base = cand->iv->base;
5110   cost_base = force_var_cost (data, base, NULL);
5111   /* It will be exceptional that the iv register happens to be initialized with
5112      the proper value at no cost.  In general, there will at least be a regcopy
5113      or a const set.  */
5114   if (cost_base.cost == 0)
5115     cost_base.cost = COSTS_N_INSNS (1);
5116   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5117
5118   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5119
5120   /* Prefer the original ivs unless we may gain something by replacing it.
5121      The reason is to make debugging simpler; so this is not relevant for
5122      artificial ivs created by other optimization passes.  */
5123   if (cand->pos != IP_ORIGINAL
5124       || !SSA_NAME_VAR (cand->var_before)
5125       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5126     cost++;
5127
5128   /* Prefer not to insert statements into latch unless there are some
5129      already (so that we do not create unnecessary jumps).  */
5130   if (cand->pos == IP_END
5131       && empty_block_p (ip_end_pos (data->current_loop)))
5132     cost++;
5133
5134   cand->cost = cost;
5135   cand->cost_step = cost_step;
5136 }
5137
5138 /* Determines costs of computation of the candidates.  */
5139
5140 static void
5141 determine_iv_costs (struct ivopts_data *data)
5142 {
5143   unsigned i;
5144
5145   if (dump_file && (dump_flags & TDF_DETAILS))
5146     {
5147       fprintf (dump_file, "Candidate costs:\n");
5148       fprintf (dump_file, "  cand\tcost\n");
5149     }
5150
5151   for (i = 0; i < n_iv_cands (data); i++)
5152     {
5153       struct iv_cand *cand = iv_cand (data, i);
5154
5155       determine_iv_cost (data, cand);
5156
5157       if (dump_file && (dump_flags & TDF_DETAILS))
5158         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5159     }
5160
5161   if (dump_file && (dump_flags & TDF_DETAILS))
5162     fprintf (dump_file, "\n");
5163 }
5164
5165 /* Calculates cost for having SIZE induction variables.  */
5166
5167 static unsigned
5168 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5169 {
5170   /* We add size to the cost, so that we prefer eliminating ivs
5171      if possible.  */
5172   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5173                                             data->body_includes_call);
5174 }
5175
5176 /* For each size of the induction variable set determine the penalty.  */
5177
5178 static void
5179 determine_set_costs (struct ivopts_data *data)
5180 {
5181   unsigned j, n;
5182   gphi *phi;
5183   gphi_iterator psi;
5184   tree op;
5185   struct loop *loop = data->current_loop;
5186   bitmap_iterator bi;
5187
5188   if (dump_file && (dump_flags & TDF_DETAILS))
5189     {
5190       fprintf (dump_file, "Global costs:\n");
5191       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5192       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5193       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5194       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5195     }
5196
5197   n = 0;
5198   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5199     {
5200       phi = psi.phi ();
5201       op = PHI_RESULT (phi);
5202
5203       if (virtual_operand_p (op))
5204         continue;
5205
5206       if (get_iv (data, op))
5207         continue;
5208
5209       n++;
5210     }
5211
5212   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5213     {
5214       struct version_info *info = ver_info (data, j);
5215
5216       if (info->inv_id && info->has_nonlin_use)
5217         n++;
5218     }
5219
5220   data->regs_used = n;
5221   if (dump_file && (dump_flags & TDF_DETAILS))
5222     fprintf (dump_file, "  regs_used %d\n", n);
5223
5224   if (dump_file && (dump_flags & TDF_DETAILS))
5225     {
5226       fprintf (dump_file, "  cost for size:\n");
5227       fprintf (dump_file, "  ivs\tcost\n");
5228       for (j = 0; j <= 2 * target_avail_regs; j++)
5229         fprintf (dump_file, "  %d\t%d\n", j,
5230                  ivopts_global_cost_for_size (data, j));
5231       fprintf (dump_file, "\n");
5232     }
5233 }
5234
5235 /* Returns true if A is a cheaper cost pair than B.  */
5236
5237 static bool
5238 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5239 {
5240   int cmp;
5241
5242   if (!a)
5243     return false;
5244
5245   if (!b)
5246     return true;
5247
5248   cmp = compare_costs (a->cost, b->cost);
5249   if (cmp < 0)
5250     return true;
5251
5252   if (cmp > 0)
5253     return false;
5254
5255   /* In case the costs are the same, prefer the cheaper candidate.  */
5256   if (a->cand->cost < b->cand->cost)
5257     return true;
5258
5259   return false;
5260 }
5261
5262
5263 /* Returns candidate by that USE is expressed in IVS.  */
5264
5265 static struct cost_pair *
5266 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5267 {
5268   return ivs->cand_for_use[use->id];
5269 }
5270
5271 /* Computes the cost field of IVS structure.  */
5272
5273 static void
5274 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5275 {
5276   comp_cost cost = ivs->cand_use_cost;
5277
5278   cost.cost += ivs->cand_cost;
5279
5280   cost.cost += ivopts_global_cost_for_size (data,
5281                                             ivs->n_regs + ivs->num_used_inv_expr);
5282
5283   ivs->cost = cost;
5284 }
5285
5286 /* Remove invariants in set INVS to set IVS.  */
5287
5288 static void
5289 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5290 {
5291   bitmap_iterator bi;
5292   unsigned iid;
5293
5294   if (!invs)
5295     return;
5296
5297   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5298     {
5299       ivs->n_invariant_uses[iid]--;
5300       if (ivs->n_invariant_uses[iid] == 0)
5301         ivs->n_regs--;
5302     }
5303 }
5304
5305 /* Set USE not to be expressed by any candidate in IVS.  */
5306
5307 static void
5308 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5309                  struct iv_use *use)
5310 {
5311   unsigned uid = use->id, cid;
5312   struct cost_pair *cp;
5313
5314   cp = ivs->cand_for_use[uid];
5315   if (!cp)
5316     return;
5317   cid = cp->cand->id;
5318
5319   ivs->bad_uses++;
5320   ivs->cand_for_use[uid] = NULL;
5321   ivs->n_cand_uses[cid]--;
5322
5323   if (ivs->n_cand_uses[cid] == 0)
5324     {
5325       bitmap_clear_bit (ivs->cands, cid);
5326       /* Do not count the pseudocandidates.  */
5327       if (cp->cand->iv)
5328         ivs->n_regs--;
5329       ivs->n_cands--;
5330       ivs->cand_cost -= cp->cand->cost;
5331
5332       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5333     }
5334
5335   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5336
5337   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5338
5339   if (cp->inv_expr_id != -1)
5340     {
5341       ivs->used_inv_expr[cp->inv_expr_id]--;
5342       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5343         ivs->num_used_inv_expr--;
5344     }
5345   iv_ca_recount_cost (data, ivs);
5346 }
5347
5348 /* Add invariants in set INVS to set IVS.  */
5349
5350 static void
5351 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5352 {
5353   bitmap_iterator bi;
5354   unsigned iid;
5355
5356   if (!invs)
5357     return;
5358
5359   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5360     {
5361       ivs->n_invariant_uses[iid]++;
5362       if (ivs->n_invariant_uses[iid] == 1)
5363         ivs->n_regs++;
5364     }
5365 }
5366
5367 /* Set cost pair for USE in set IVS to CP.  */
5368
5369 static void
5370 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5371               struct iv_use *use, struct cost_pair *cp)
5372 {
5373   unsigned uid = use->id, cid;
5374
5375   if (ivs->cand_for_use[uid] == cp)
5376     return;
5377
5378   if (ivs->cand_for_use[uid])
5379     iv_ca_set_no_cp (data, ivs, use);
5380
5381   if (cp)
5382     {
5383       cid = cp->cand->id;
5384
5385       ivs->bad_uses--;
5386       ivs->cand_for_use[uid] = cp;
5387       ivs->n_cand_uses[cid]++;
5388       if (ivs->n_cand_uses[cid] == 1)
5389         {
5390           bitmap_set_bit (ivs->cands, cid);
5391           /* Do not count the pseudocandidates.  */
5392           if (cp->cand->iv)
5393             ivs->n_regs++;
5394           ivs->n_cands++;
5395           ivs->cand_cost += cp->cand->cost;
5396
5397           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5398         }
5399
5400       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5401       iv_ca_set_add_invariants (ivs, cp->depends_on);
5402
5403       if (cp->inv_expr_id != -1)
5404         {
5405           ivs->used_inv_expr[cp->inv_expr_id]++;
5406           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5407             ivs->num_used_inv_expr++;
5408         }
5409       iv_ca_recount_cost (data, ivs);
5410     }
5411 }
5412
5413 /* Extend set IVS by expressing USE by some of the candidates in it
5414    if possible.  Consider all important candidates if candidates in
5415    set IVS don't give any result.  */
5416
5417 static void
5418 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5419                struct iv_use *use)
5420 {
5421   struct cost_pair *best_cp = NULL, *cp;
5422   bitmap_iterator bi;
5423   unsigned i;
5424   struct iv_cand *cand;
5425
5426   gcc_assert (ivs->upto >= use->id);
5427   ivs->upto++;
5428   ivs->bad_uses++;
5429
5430   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5431     {
5432       cand = iv_cand (data, i);
5433       cp = get_use_iv_cost (data, use, cand);
5434       if (cheaper_cost_pair (cp, best_cp))
5435         best_cp = cp;
5436     }
5437
5438   if (best_cp == NULL)
5439     {
5440       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5441         {
5442           cand = iv_cand (data, i);
5443           cp = get_use_iv_cost (data, use, cand);
5444           if (cheaper_cost_pair (cp, best_cp))
5445             best_cp = cp;
5446         }
5447     }
5448
5449   iv_ca_set_cp (data, ivs, use, best_cp);
5450 }
5451
5452 /* Get cost for assignment IVS.  */
5453
5454 static comp_cost
5455 iv_ca_cost (struct iv_ca *ivs)
5456 {
5457   /* This was a conditional expression but it triggered a bug in
5458      Sun C 5.5.  */
5459   if (ivs->bad_uses)
5460     return infinite_cost;
5461   else
5462     return ivs->cost;
5463 }
5464
5465 /* Returns true if all dependences of CP are among invariants in IVS.  */
5466
5467 static bool
5468 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5469 {
5470   unsigned i;
5471   bitmap_iterator bi;
5472
5473   if (!cp->depends_on)
5474     return true;
5475
5476   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5477     {
5478       if (ivs->n_invariant_uses[i] == 0)
5479         return false;
5480     }
5481
5482   return true;
5483 }
5484
5485 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5486    it before NEXT_CHANGE.  */
5487
5488 static struct iv_ca_delta *
5489 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5490                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5491 {
5492   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5493
5494   change->use = use;
5495   change->old_cp = old_cp;
5496   change->new_cp = new_cp;
5497   change->next_change = next_change;
5498
5499   return change;
5500 }
5501
5502 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5503    are rewritten.  */
5504
5505 static struct iv_ca_delta *
5506 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5507 {
5508   struct iv_ca_delta *last;
5509
5510   if (!l2)
5511     return l1;
5512
5513   if (!l1)
5514     return l2;
5515
5516   for (last = l1; last->next_change; last = last->next_change)
5517     continue;
5518   last->next_change = l2;
5519
5520   return l1;
5521 }
5522
5523 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5524
5525 static struct iv_ca_delta *
5526 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5527 {
5528   struct iv_ca_delta *act, *next, *prev = NULL;
5529   struct cost_pair *tmp;
5530
5531   for (act = delta; act; act = next)
5532     {
5533       next = act->next_change;
5534       act->next_change = prev;
5535       prev = act;
5536
5537       tmp = act->old_cp;
5538       act->old_cp = act->new_cp;
5539       act->new_cp = tmp;
5540     }
5541
5542   return prev;
5543 }
5544
5545 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5546    reverted instead.  */
5547
5548 static void
5549 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5550                     struct iv_ca_delta *delta, bool forward)
5551 {
5552   struct cost_pair *from, *to;
5553   struct iv_ca_delta *act;
5554
5555   if (!forward)
5556     delta = iv_ca_delta_reverse (delta);
5557
5558   for (act = delta; act; act = act->next_change)
5559     {
5560       from = act->old_cp;
5561       to = act->new_cp;
5562       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5563       iv_ca_set_cp (data, ivs, act->use, to);
5564     }
5565
5566   if (!forward)
5567     iv_ca_delta_reverse (delta);
5568 }
5569
5570 /* Returns true if CAND is used in IVS.  */
5571
5572 static bool
5573 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5574 {
5575   return ivs->n_cand_uses[cand->id] > 0;
5576 }
5577
5578 /* Returns number of induction variable candidates in the set IVS.  */
5579
5580 static unsigned
5581 iv_ca_n_cands (struct iv_ca *ivs)
5582 {
5583   return ivs->n_cands;
5584 }
5585
5586 /* Free the list of changes DELTA.  */
5587
5588 static void
5589 iv_ca_delta_free (struct iv_ca_delta **delta)
5590 {
5591   struct iv_ca_delta *act, *next;
5592
5593   for (act = *delta; act; act = next)
5594     {
5595       next = act->next_change;
5596       free (act);
5597     }
5598
5599   *delta = NULL;
5600 }
5601
5602 /* Allocates new iv candidates assignment.  */
5603
5604 static struct iv_ca *
5605 iv_ca_new (struct ivopts_data *data)
5606 {
5607   struct iv_ca *nw = XNEW (struct iv_ca);
5608
5609   nw->upto = 0;
5610   nw->bad_uses = 0;
5611   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5612   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5613   nw->cands = BITMAP_ALLOC (NULL);
5614   nw->n_cands = 0;
5615   nw->n_regs = 0;
5616   nw->cand_use_cost = no_cost;
5617   nw->cand_cost = 0;
5618   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5619   nw->cost = no_cost;
5620   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5621   nw->num_used_inv_expr = 0;
5622
5623   return nw;
5624 }
5625
5626 /* Free memory occupied by the set IVS.  */
5627
5628 static void
5629 iv_ca_free (struct iv_ca **ivs)
5630 {
5631   free ((*ivs)->cand_for_use);
5632   free ((*ivs)->n_cand_uses);
5633   BITMAP_FREE ((*ivs)->cands);
5634   free ((*ivs)->n_invariant_uses);
5635   free ((*ivs)->used_inv_expr);
5636   free (*ivs);
5637   *ivs = NULL;
5638 }
5639
5640 /* Dumps IVS to FILE.  */
5641
5642 static void
5643 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5644 {
5645   const char *pref = "  invariants ";
5646   unsigned i;
5647   comp_cost cost = iv_ca_cost (ivs);
5648
5649   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5650   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5651            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5652   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5653
5654    for (i = 0; i < ivs->upto; i++)
5655     {
5656       struct iv_use *use = iv_use (data, i);
5657       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5658       if (cp)
5659         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5660                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5661       else
5662         fprintf (file, "   use:%d --> ??\n", use->id);
5663     }
5664
5665   for (i = 1; i <= data->max_inv_id; i++)
5666     if (ivs->n_invariant_uses[i])
5667       {
5668         fprintf (file, "%s%d", pref, i);
5669         pref = ", ";
5670       }
5671   fprintf (file, "\n\n");
5672 }
5673
5674 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5675    new set, and store differences in DELTA.  Number of induction variables
5676    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5677    the function will try to find a solution with mimimal iv candidates.  */
5678
5679 static comp_cost
5680 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5681               struct iv_cand *cand, struct iv_ca_delta **delta,
5682               unsigned *n_ivs, bool min_ncand)
5683 {
5684   unsigned i;
5685   comp_cost cost;
5686   struct iv_use *use;
5687   struct cost_pair *old_cp, *new_cp;
5688
5689   *delta = NULL;
5690   for (i = 0; i < ivs->upto; i++)
5691     {
5692       use = iv_use (data, i);
5693       old_cp = iv_ca_cand_for_use (ivs, use);
5694
5695       if (old_cp
5696           && old_cp->cand == cand)
5697         continue;
5698
5699       new_cp = get_use_iv_cost (data, use, cand);
5700       if (!new_cp)
5701         continue;
5702
5703       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5704         continue;
5705
5706       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5707         continue;
5708
5709       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5710     }
5711
5712   iv_ca_delta_commit (data, ivs, *delta, true);
5713   cost = iv_ca_cost (ivs);
5714   if (n_ivs)
5715     *n_ivs = iv_ca_n_cands (ivs);
5716   iv_ca_delta_commit (data, ivs, *delta, false);
5717
5718   return cost;
5719 }
5720
5721 /* Try narrowing set IVS by removing CAND.  Return the cost of
5722    the new set and store the differences in DELTA.  START is
5723    the candidate with which we start narrowing.  */
5724
5725 static comp_cost
5726 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5727               struct iv_cand *cand, struct iv_cand *start,
5728               struct iv_ca_delta **delta)
5729 {
5730   unsigned i, ci;
5731   struct iv_use *use;
5732   struct cost_pair *old_cp, *new_cp, *cp;
5733   bitmap_iterator bi;
5734   struct iv_cand *cnd;
5735   comp_cost cost, best_cost, acost;
5736
5737   *delta = NULL;
5738   for (i = 0; i < n_iv_uses (data); i++)
5739     {
5740       use = iv_use (data, i);
5741
5742       old_cp = iv_ca_cand_for_use (ivs, use);
5743       if (old_cp->cand != cand)
5744         continue;
5745
5746       best_cost = iv_ca_cost (ivs);
5747       /* Start narrowing with START.  */
5748       new_cp = get_use_iv_cost (data, use, start);
5749
5750       if (data->consider_all_candidates)
5751         {
5752           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5753             {
5754               if (ci == cand->id || (start && ci == start->id))
5755                 continue;
5756
5757               cnd = iv_cand (data, ci);
5758
5759               cp = get_use_iv_cost (data, use, cnd);
5760               if (!cp)
5761                 continue;
5762
5763               iv_ca_set_cp (data, ivs, use, cp);
5764               acost = iv_ca_cost (ivs);
5765
5766               if (compare_costs (acost, best_cost) < 0)
5767                 {
5768                   best_cost = acost;
5769                   new_cp = cp;
5770                 }
5771             }
5772         }
5773       else
5774         {
5775           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5776             {
5777               if (ci == cand->id || (start && ci == start->id))
5778                 continue;
5779
5780               cnd = iv_cand (data, ci);
5781
5782               cp = get_use_iv_cost (data, use, cnd);
5783               if (!cp)
5784                 continue;
5785
5786               iv_ca_set_cp (data, ivs, use, cp);
5787               acost = iv_ca_cost (ivs);
5788
5789               if (compare_costs (acost, best_cost) < 0)
5790                 {
5791                   best_cost = acost;
5792                   new_cp = cp;
5793                 }
5794             }
5795         }
5796       /* Restore to old cp for use.  */
5797       iv_ca_set_cp (data, ivs, use, old_cp);
5798
5799       if (!new_cp)
5800         {
5801           iv_ca_delta_free (delta);
5802           return infinite_cost;
5803         }
5804
5805       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5806     }
5807
5808   iv_ca_delta_commit (data, ivs, *delta, true);
5809   cost = iv_ca_cost (ivs);
5810   iv_ca_delta_commit (data, ivs, *delta, false);
5811
5812   return cost;
5813 }
5814
5815 /* Try optimizing the set of candidates IVS by removing candidates different
5816    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5817    differences in DELTA.  */
5818
5819 static comp_cost
5820 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5821              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5822 {
5823   bitmap_iterator bi;
5824   struct iv_ca_delta *act_delta, *best_delta;
5825   unsigned i;
5826   comp_cost best_cost, acost;
5827   struct iv_cand *cand;
5828
5829   best_delta = NULL;
5830   best_cost = iv_ca_cost (ivs);
5831
5832   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5833     {
5834       cand = iv_cand (data, i);
5835
5836       if (cand == except_cand)
5837         continue;
5838
5839       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5840
5841       if (compare_costs (acost, best_cost) < 0)
5842         {
5843           best_cost = acost;
5844           iv_ca_delta_free (&best_delta);
5845           best_delta = act_delta;
5846         }
5847       else
5848         iv_ca_delta_free (&act_delta);
5849     }
5850
5851   if (!best_delta)
5852     {
5853       *delta = NULL;
5854       return best_cost;
5855     }
5856
5857   /* Recurse to possibly remove other unnecessary ivs.  */
5858   iv_ca_delta_commit (data, ivs, best_delta, true);
5859   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5860   iv_ca_delta_commit (data, ivs, best_delta, false);
5861   *delta = iv_ca_delta_join (best_delta, *delta);
5862   return best_cost;
5863 }
5864
5865 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5866    cheaper local cost for USE than BEST_CP.  Return pointer to
5867    the corresponding cost_pair, otherwise just return BEST_CP.  */
5868
5869 static struct cost_pair*
5870 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5871                         unsigned int cand_idx, struct iv_cand *old_cand,
5872                         struct cost_pair *best_cp)
5873 {
5874   struct iv_cand *cand;
5875   struct cost_pair *cp;
5876
5877   gcc_assert (old_cand != NULL && best_cp != NULL);
5878   if (cand_idx == old_cand->id)
5879     return best_cp;
5880
5881   cand = iv_cand (data, cand_idx);
5882   cp = get_use_iv_cost (data, use, cand);
5883   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5884     return cp;
5885
5886   return best_cp;
5887 }
5888
5889 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5890    which are used by more than one iv uses.  For each of those candidates,
5891    this function tries to represent iv uses under that candidate using
5892    other ones with lower local cost, then tries to prune the new set.
5893    If the new set has lower cost, It returns the new cost after recording
5894    candidate replacement in list DELTA.  */
5895
5896 static comp_cost
5897 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5898                struct iv_ca_delta **delta)
5899 {
5900   bitmap_iterator bi, bj;
5901   unsigned int i, j, k;
5902   struct iv_use *use;
5903   struct iv_cand *cand;
5904   comp_cost orig_cost, acost;
5905   struct iv_ca_delta *act_delta, *tmp_delta;
5906   struct cost_pair *old_cp, *best_cp = NULL;
5907
5908   *delta = NULL;
5909   orig_cost = iv_ca_cost (ivs);
5910
5911   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5912     {
5913       if (ivs->n_cand_uses[i] == 1
5914           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5915         continue;
5916
5917       cand = iv_cand (data, i);
5918
5919       act_delta = NULL;
5920       /*  Represent uses under current candidate using other ones with
5921           lower local cost.  */
5922       for (j = 0; j < ivs->upto; j++)
5923         {
5924           use = iv_use (data, j);
5925           old_cp = iv_ca_cand_for_use (ivs, use);
5926
5927           if (old_cp->cand != cand)
5928             continue;
5929
5930           best_cp = old_cp;
5931           if (data->consider_all_candidates)
5932             for (k = 0; k < n_iv_cands (data); k++)
5933               best_cp = cheaper_cost_with_cand (data, use, k,
5934                                                 old_cp->cand, best_cp);
5935           else
5936             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5937               best_cp = cheaper_cost_with_cand (data, use, k,
5938                                                 old_cp->cand, best_cp);
5939
5940           if (best_cp == old_cp)
5941             continue;
5942
5943           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
5944         }
5945       /* No need for further prune.  */
5946       if (!act_delta)
5947         continue;
5948
5949       /* Prune the new candidate set.  */
5950       iv_ca_delta_commit (data, ivs, act_delta, true);
5951       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
5952       iv_ca_delta_commit (data, ivs, act_delta, false);
5953       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5954
5955       if (compare_costs (acost, orig_cost) < 0)
5956         {
5957           *delta = act_delta;
5958           return acost;
5959         }
5960       else
5961         iv_ca_delta_free (&act_delta);
5962     }
5963
5964   return orig_cost;
5965 }
5966
5967 /* Tries to extend the sets IVS in the best possible way in order
5968    to express the USE.  If ORIGINALP is true, prefer candidates from
5969    the original set of IVs, otherwise favor important candidates not
5970    based on any memory object.  */
5971
5972 static bool
5973 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5974                   struct iv_use *use, bool originalp)
5975 {
5976   comp_cost best_cost, act_cost;
5977   unsigned i;
5978   bitmap_iterator bi;
5979   struct iv_cand *cand;
5980   struct iv_ca_delta *best_delta = NULL, *act_delta;
5981   struct cost_pair *cp;
5982
5983   iv_ca_add_use (data, ivs, use);
5984   best_cost = iv_ca_cost (ivs);
5985   cp = iv_ca_cand_for_use (ivs, use);
5986   if (cp)
5987     {
5988       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5989       iv_ca_set_no_cp (data, ivs, use);
5990     }
5991
5992   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5993      first try important candidates not based on any memory object.  Only if
5994      this fails, try the specific ones.  Rationale -- in loops with many
5995      variables the best choice often is to use just one generic biv.  If we
5996      added here many ivs specific to the uses, the optimization algorithm later
5997      would be likely to get stuck in a local minimum, thus causing us to create
5998      too many ivs.  The approach from few ivs to more seems more likely to be
5999      successful -- starting from few ivs, replacing an expensive use by a
6000      specific iv should always be a win.  */
6001   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6002     {
6003       cand = iv_cand (data, i);
6004
6005       if (originalp && cand->pos !=IP_ORIGINAL)
6006         continue;
6007
6008       if (!originalp && cand->iv->base_object != NULL_TREE)
6009         continue;
6010
6011       if (iv_ca_cand_used_p (ivs, cand))
6012         continue;
6013
6014       cp = get_use_iv_cost (data, use, cand);
6015       if (!cp)
6016         continue;
6017
6018       iv_ca_set_cp (data, ivs, use, cp);
6019       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6020                                true);
6021       iv_ca_set_no_cp (data, ivs, use);
6022       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6023
6024       if (compare_costs (act_cost, best_cost) < 0)
6025         {
6026           best_cost = act_cost;
6027
6028           iv_ca_delta_free (&best_delta);
6029           best_delta = act_delta;
6030         }
6031       else
6032         iv_ca_delta_free (&act_delta);
6033     }
6034
6035   if (infinite_cost_p (best_cost))
6036     {
6037       for (i = 0; i < use->n_map_members; i++)
6038         {
6039           cp = use->cost_map + i;
6040           cand = cp->cand;
6041           if (!cand)
6042             continue;
6043
6044           /* Already tried this.  */
6045           if (cand->important)
6046             {
6047               if (originalp && cand->pos == IP_ORIGINAL)
6048                 continue;
6049               if (!originalp && cand->iv->base_object == NULL_TREE)
6050                 continue;
6051             }
6052
6053           if (iv_ca_cand_used_p (ivs, cand))
6054             continue;
6055
6056           act_delta = NULL;
6057           iv_ca_set_cp (data, ivs, use, cp);
6058           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6059           iv_ca_set_no_cp (data, ivs, use);
6060           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6061                                        cp, act_delta);
6062
6063           if (compare_costs (act_cost, best_cost) < 0)
6064             {
6065               best_cost = act_cost;
6066
6067               if (best_delta)
6068                 iv_ca_delta_free (&best_delta);
6069               best_delta = act_delta;
6070             }
6071           else
6072             iv_ca_delta_free (&act_delta);
6073         }
6074     }
6075
6076   iv_ca_delta_commit (data, ivs, best_delta, true);
6077   iv_ca_delta_free (&best_delta);
6078
6079   return !infinite_cost_p (best_cost);
6080 }
6081
6082 /* Finds an initial assignment of candidates to uses.  */
6083
6084 static struct iv_ca *
6085 get_initial_solution (struct ivopts_data *data, bool originalp)
6086 {
6087   struct iv_ca *ivs = iv_ca_new (data);
6088   unsigned i;
6089
6090   for (i = 0; i < n_iv_uses (data); i++)
6091     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6092       {
6093         iv_ca_free (&ivs);
6094         return NULL;
6095       }
6096
6097   return ivs;
6098 }
6099
6100 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6101    points to a bool variable, this function tries to break local
6102    optimal fixed-point by replacing candidates in IVS if it's true.  */
6103
6104 static bool
6105 try_improve_iv_set (struct ivopts_data *data,
6106                     struct iv_ca *ivs, bool *try_replace_p)
6107 {
6108   unsigned i, n_ivs;
6109   comp_cost acost, best_cost = iv_ca_cost (ivs);
6110   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6111   struct iv_cand *cand;
6112
6113   /* Try extending the set of induction variables by one.  */
6114   for (i = 0; i < n_iv_cands (data); i++)
6115     {
6116       cand = iv_cand (data, i);
6117
6118       if (iv_ca_cand_used_p (ivs, cand))
6119         continue;
6120
6121       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6122       if (!act_delta)
6123         continue;
6124
6125       /* If we successfully added the candidate and the set is small enough,
6126          try optimizing it by removing other candidates.  */
6127       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6128         {
6129           iv_ca_delta_commit (data, ivs, act_delta, true);
6130           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6131           iv_ca_delta_commit (data, ivs, act_delta, false);
6132           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6133         }
6134
6135       if (compare_costs (acost, best_cost) < 0)
6136         {
6137           best_cost = acost;
6138           iv_ca_delta_free (&best_delta);
6139           best_delta = act_delta;
6140         }
6141       else
6142         iv_ca_delta_free (&act_delta);
6143     }
6144
6145   if (!best_delta)
6146     {
6147       /* Try removing the candidates from the set instead.  */
6148       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6149
6150       if (!best_delta && *try_replace_p)
6151         {
6152           *try_replace_p = false;
6153           /* So far candidate selecting algorithm tends to choose fewer IVs
6154              so that it can handle cases in which loops have many variables
6155              but the best choice is often to use only one general biv.  One
6156              weakness is it can't handle opposite cases, in which different
6157              candidates should be chosen with respect to each use.  To solve
6158              the problem, we replace candidates in a manner described by the
6159              comments of iv_ca_replace, thus give general algorithm a chance
6160              to break local optimal fixed-point in these cases.  */
6161           best_cost = iv_ca_replace (data, ivs, &best_delta);
6162         }
6163
6164       if (!best_delta)
6165         return false;
6166     }
6167
6168   iv_ca_delta_commit (data, ivs, best_delta, true);
6169   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6170   iv_ca_delta_free (&best_delta);
6171   return true;
6172 }
6173
6174 /* Attempts to find the optimal set of induction variables.  We do simple
6175    greedy heuristic -- we try to replace at most one candidate in the selected
6176    solution and remove the unused ivs while this improves the cost.  */
6177
6178 static struct iv_ca *
6179 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6180 {
6181   struct iv_ca *set;
6182   bool try_replace_p = true;
6183
6184   /* Get the initial solution.  */
6185   set = get_initial_solution (data, originalp);
6186   if (!set)
6187     {
6188       if (dump_file && (dump_flags & TDF_DETAILS))
6189         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6190       return NULL;
6191     }
6192
6193   if (dump_file && (dump_flags & TDF_DETAILS))
6194     {
6195       fprintf (dump_file, "Initial set of candidates:\n");
6196       iv_ca_dump (data, dump_file, set);
6197     }
6198
6199   while (try_improve_iv_set (data, set, &try_replace_p))
6200     {
6201       if (dump_file && (dump_flags & TDF_DETAILS))
6202         {
6203           fprintf (dump_file, "Improved to:\n");
6204           iv_ca_dump (data, dump_file, set);
6205         }
6206     }
6207
6208   return set;
6209 }
6210
6211 static struct iv_ca *
6212 find_optimal_iv_set (struct ivopts_data *data)
6213 {
6214   unsigned i;
6215   struct iv_ca *set, *origset;
6216   struct iv_use *use;
6217   comp_cost cost, origcost;
6218
6219   /* Determine the cost based on a strategy that starts with original IVs,
6220      and try again using a strategy that prefers candidates not based
6221      on any IVs.  */
6222   origset = find_optimal_iv_set_1 (data, true);
6223   set = find_optimal_iv_set_1 (data, false);
6224
6225   if (!origset && !set)
6226     return NULL;
6227
6228   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6229   cost = set ? iv_ca_cost (set) : infinite_cost;
6230
6231   if (dump_file && (dump_flags & TDF_DETAILS))
6232     {
6233       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6234                origcost.cost, origcost.complexity);
6235       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6236                cost.cost, cost.complexity);
6237     }
6238
6239   /* Choose the one with the best cost.  */
6240   if (compare_costs (origcost, cost) <= 0)
6241     {
6242       if (set)
6243         iv_ca_free (&set);
6244       set = origset;
6245     }
6246   else if (origset)
6247     iv_ca_free (&origset);
6248
6249   for (i = 0; i < n_iv_uses (data); i++)
6250     {
6251       use = iv_use (data, i);
6252       use->selected = iv_ca_cand_for_use (set, use)->cand;
6253     }
6254
6255   return set;
6256 }
6257
6258 /* Creates a new induction variable corresponding to CAND.  */
6259
6260 static void
6261 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6262 {
6263   gimple_stmt_iterator incr_pos;
6264   tree base;
6265   bool after = false;
6266
6267   if (!cand->iv)
6268     return;
6269
6270   switch (cand->pos)
6271     {
6272     case IP_NORMAL:
6273       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6274       break;
6275
6276     case IP_END:
6277       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6278       after = true;
6279       break;
6280
6281     case IP_AFTER_USE:
6282       after = true;
6283       /* fall through */
6284     case IP_BEFORE_USE:
6285       incr_pos = gsi_for_stmt (cand->incremented_at);
6286       break;
6287
6288     case IP_ORIGINAL:
6289       /* Mark that the iv is preserved.  */
6290       name_info (data, cand->var_before)->preserve_biv = true;
6291       name_info (data, cand->var_after)->preserve_biv = true;
6292
6293       /* Rewrite the increment so that it uses var_before directly.  */
6294       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6295       return;
6296     }
6297
6298   gimple_add_tmp_var (cand->var_before);
6299
6300   base = unshare_expr (cand->iv->base);
6301
6302   create_iv (base, unshare_expr (cand->iv->step),
6303              cand->var_before, data->current_loop,
6304              &incr_pos, after, &cand->var_before, &cand->var_after);
6305 }
6306
6307 /* Creates new induction variables described in SET.  */
6308
6309 static void
6310 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6311 {
6312   unsigned i;
6313   struct iv_cand *cand;
6314   bitmap_iterator bi;
6315
6316   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6317     {
6318       cand = iv_cand (data, i);
6319       create_new_iv (data, cand);
6320     }
6321
6322   if (dump_file && (dump_flags & TDF_DETAILS))
6323     {
6324       fprintf (dump_file, "\nSelected IV set: \n");
6325       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6326         {
6327           cand = iv_cand (data, i);
6328           dump_cand (dump_file, cand);
6329         }
6330       fprintf (dump_file, "\n");
6331     }
6332 }
6333
6334 /* Rewrites USE (definition of iv used in a nonlinear expression)
6335    using candidate CAND.  */
6336
6337 static void
6338 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6339                             struct iv_use *use, struct iv_cand *cand)
6340 {
6341   tree comp;
6342   tree op, tgt;
6343   gassign *ass;
6344   gimple_stmt_iterator bsi;
6345
6346   /* An important special case -- if we are asked to express value of
6347      the original iv by itself, just exit; there is no need to
6348      introduce a new computation (that might also need casting the
6349      variable to unsigned and back).  */
6350   if (cand->pos == IP_ORIGINAL
6351       && cand->incremented_at == use->stmt)
6352     {
6353       enum tree_code stmt_code;
6354
6355       gcc_assert (is_gimple_assign (use->stmt));
6356       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6357
6358       /* Check whether we may leave the computation unchanged.
6359          This is the case only if it does not rely on other
6360          computations in the loop -- otherwise, the computation
6361          we rely upon may be removed in remove_unused_ivs,
6362          thus leading to ICE.  */
6363       stmt_code = gimple_assign_rhs_code (use->stmt);
6364       if (stmt_code == PLUS_EXPR
6365           || stmt_code == MINUS_EXPR
6366           || stmt_code == POINTER_PLUS_EXPR)
6367         {
6368           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6369             op = gimple_assign_rhs2 (use->stmt);
6370           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6371             op = gimple_assign_rhs1 (use->stmt);
6372           else
6373             op = NULL_TREE;
6374         }
6375       else
6376         op = NULL_TREE;
6377
6378       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6379         return;
6380     }
6381
6382   comp = get_computation (data->current_loop, use, cand);
6383   gcc_assert (comp != NULL_TREE);
6384
6385   switch (gimple_code (use->stmt))
6386     {
6387     case GIMPLE_PHI:
6388       tgt = PHI_RESULT (use->stmt);
6389
6390       /* If we should keep the biv, do not replace it.  */
6391       if (name_info (data, tgt)->preserve_biv)
6392         return;
6393
6394       bsi = gsi_after_labels (gimple_bb (use->stmt));
6395       break;
6396
6397     case GIMPLE_ASSIGN:
6398       tgt = gimple_assign_lhs (use->stmt);
6399       bsi = gsi_for_stmt (use->stmt);
6400       break;
6401
6402     default:
6403       gcc_unreachable ();
6404     }
6405
6406   if (!valid_gimple_rhs_p (comp)
6407       || (gimple_code (use->stmt) != GIMPLE_PHI
6408           /* We can't allow re-allocating the stmt as it might be pointed
6409              to still.  */
6410           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6411               >= gimple_num_ops (gsi_stmt (bsi)))))
6412     {
6413       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6414                                        true, GSI_SAME_STMT);
6415       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6416         {
6417           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6418           /* As this isn't a plain copy we have to reset alignment
6419              information.  */
6420           if (SSA_NAME_PTR_INFO (comp))
6421             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6422         }
6423     }
6424
6425   if (gimple_code (use->stmt) == GIMPLE_PHI)
6426     {
6427       ass = gimple_build_assign (tgt, comp);
6428       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6429
6430       bsi = gsi_for_stmt (use->stmt);
6431       remove_phi_node (&bsi, false);
6432     }
6433   else
6434     {
6435       gimple_assign_set_rhs_from_tree (&bsi, comp);
6436       use->stmt = gsi_stmt (bsi);
6437     }
6438 }
6439
6440 /* Performs a peephole optimization to reorder the iv update statement with
6441    a mem ref to enable instruction combining in later phases. The mem ref uses
6442    the iv value before the update, so the reordering transformation requires
6443    adjustment of the offset. CAND is the selected IV_CAND.
6444
6445    Example:
6446
6447    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6448    iv2 = iv1 + 1;
6449
6450    if (t < val)      (1)
6451      goto L;
6452    goto Head;
6453
6454
6455    directly propagating t over to (1) will introduce overlapping live range
6456    thus increase register pressure. This peephole transform it into:
6457
6458
6459    iv2 = iv1 + 1;
6460    t = MEM_REF (base, iv2, 8, 8);
6461    if (t < val)
6462      goto L;
6463    goto Head;
6464 */
6465
6466 static void
6467 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6468 {
6469   tree var_after;
6470   gimple iv_update, stmt;
6471   basic_block bb;
6472   gimple_stmt_iterator gsi, gsi_iv;
6473
6474   if (cand->pos != IP_NORMAL)
6475     return;
6476
6477   var_after = cand->var_after;
6478   iv_update = SSA_NAME_DEF_STMT (var_after);
6479
6480   bb = gimple_bb (iv_update);
6481   gsi = gsi_last_nondebug_bb (bb);
6482   stmt = gsi_stmt (gsi);
6483
6484   /* Only handle conditional statement for now.  */
6485   if (gimple_code (stmt) != GIMPLE_COND)
6486     return;
6487
6488   gsi_prev_nondebug (&gsi);
6489   stmt = gsi_stmt (gsi);
6490   if (stmt != iv_update)
6491     return;
6492
6493   gsi_prev_nondebug (&gsi);
6494   if (gsi_end_p (gsi))
6495     return;
6496
6497   stmt = gsi_stmt (gsi);
6498   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6499     return;
6500
6501   if (stmt != use->stmt)
6502     return;
6503
6504   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6505     return;
6506
6507   if (dump_file && (dump_flags & TDF_DETAILS))
6508     {
6509       fprintf (dump_file, "Reordering \n");
6510       print_gimple_stmt (dump_file, iv_update, 0, 0);
6511       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6512       fprintf (dump_file, "\n");
6513     }
6514
6515   gsi = gsi_for_stmt (use->stmt);
6516   gsi_iv = gsi_for_stmt (iv_update);
6517   gsi_move_before (&gsi_iv, &gsi);
6518
6519   cand->pos = IP_BEFORE_USE;
6520   cand->incremented_at = use->stmt;
6521 }
6522
6523 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6524
6525 static void
6526 rewrite_use_address (struct ivopts_data *data,
6527                      struct iv_use *use, struct iv_cand *cand)
6528 {
6529   aff_tree aff;
6530   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6531   tree base_hint = NULL_TREE;
6532   tree ref, iv;
6533   bool ok;
6534
6535   adjust_iv_update_pos (cand, use);
6536   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6537   gcc_assert (ok);
6538   unshare_aff_combination (&aff);
6539
6540   /* To avoid undefined overflow problems, all IV candidates use unsigned
6541      integer types.  The drawback is that this makes it impossible for
6542      create_mem_ref to distinguish an IV that is based on a memory object
6543      from one that represents simply an offset.
6544
6545      To work around this problem, we pass a hint to create_mem_ref that
6546      indicates which variable (if any) in aff is an IV based on a memory
6547      object.  Note that we only consider the candidate.  If this is not
6548      based on an object, the base of the reference is in some subexpression
6549      of the use -- but these will use pointer types, so they are recognized
6550      by the create_mem_ref heuristics anyway.  */
6551   if (cand->iv->base_object)
6552     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6553
6554   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6555   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6556                         reference_alias_ptr_type (*use->op_p),
6557                         iv, base_hint, data->speed);
6558   copy_ref_info (ref, *use->op_p);
6559   *use->op_p = ref;
6560 }
6561
6562 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6563    candidate CAND.  */
6564
6565 static void
6566 rewrite_use_compare (struct ivopts_data *data,
6567                      struct iv_use *use, struct iv_cand *cand)
6568 {
6569   tree comp, *var_p, op, bound;
6570   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6571   enum tree_code compare;
6572   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6573   bool ok;
6574
6575   bound = cp->value;
6576   if (bound)
6577     {
6578       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6579       tree var_type = TREE_TYPE (var);
6580       gimple_seq stmts;
6581
6582       if (dump_file && (dump_flags & TDF_DETAILS))
6583         {
6584           fprintf (dump_file, "Replacing exit test: ");
6585           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6586         }
6587       compare = cp->comp;
6588       bound = unshare_expr (fold_convert (var_type, bound));
6589       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6590       if (stmts)
6591         gsi_insert_seq_on_edge_immediate (
6592                 loop_preheader_edge (data->current_loop),
6593                 stmts);
6594
6595       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6596       gimple_cond_set_lhs (cond_stmt, var);
6597       gimple_cond_set_code (cond_stmt, compare);
6598       gimple_cond_set_rhs (cond_stmt, op);
6599       return;
6600     }
6601
6602   /* The induction variable elimination failed; just express the original
6603      giv.  */
6604   comp = get_computation (data->current_loop, use, cand);
6605   gcc_assert (comp != NULL_TREE);
6606
6607   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6608   gcc_assert (ok);
6609
6610   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6611                                      true, GSI_SAME_STMT);
6612 }
6613
6614 /* Rewrites USE using candidate CAND.  */
6615
6616 static void
6617 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6618 {
6619   switch (use->type)
6620     {
6621       case USE_NONLINEAR_EXPR:
6622         rewrite_use_nonlinear_expr (data, use, cand);
6623         break;
6624
6625       case USE_ADDRESS:
6626         rewrite_use_address (data, use, cand);
6627         break;
6628
6629       case USE_COMPARE:
6630         rewrite_use_compare (data, use, cand);
6631         break;
6632
6633       default:
6634         gcc_unreachable ();
6635     }
6636
6637   update_stmt (use->stmt);
6638 }
6639
6640 /* Rewrite the uses using the selected induction variables.  */
6641
6642 static void
6643 rewrite_uses (struct ivopts_data *data)
6644 {
6645   unsigned i;
6646   struct iv_cand *cand;
6647   struct iv_use *use;
6648
6649   for (i = 0; i < n_iv_uses (data); i++)
6650     {
6651       use = iv_use (data, i);
6652       cand = use->selected;
6653       gcc_assert (cand);
6654
6655       rewrite_use (data, use, cand);
6656     }
6657 }
6658
6659 /* Removes the ivs that are not used after rewriting.  */
6660
6661 static void
6662 remove_unused_ivs (struct ivopts_data *data)
6663 {
6664   unsigned j;
6665   bitmap_iterator bi;
6666   bitmap toremove = BITMAP_ALLOC (NULL);
6667
6668   /* Figure out an order in which to release SSA DEFs so that we don't
6669      release something that we'd have to propagate into a debug stmt
6670      afterwards.  */
6671   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6672     {
6673       struct version_info *info;
6674
6675       info = ver_info (data, j);
6676       if (info->iv
6677           && !integer_zerop (info->iv->step)
6678           && !info->inv_id
6679           && !info->iv->have_use_for
6680           && !info->preserve_biv)
6681         {
6682           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6683
6684           tree def = info->iv->ssa_name;
6685
6686           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6687             {
6688               imm_use_iterator imm_iter;
6689               use_operand_p use_p;
6690               gimple stmt;
6691               int count = 0;
6692
6693               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6694                 {
6695                   if (!gimple_debug_bind_p (stmt))
6696                     continue;
6697
6698                   /* We just want to determine whether to do nothing
6699                      (count == 0), to substitute the computed
6700                      expression into a single use of the SSA DEF by
6701                      itself (count == 1), or to use a debug temp
6702                      because the SSA DEF is used multiple times or as
6703                      part of a larger expression (count > 1). */
6704                   count++;
6705                   if (gimple_debug_bind_get_value (stmt) != def)
6706                     count++;
6707
6708                   if (count > 1)
6709                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6710                 }
6711
6712               if (!count)
6713                 continue;
6714
6715               struct iv_use dummy_use;
6716               struct iv_cand *best_cand = NULL, *cand;
6717               unsigned i, best_pref = 0, cand_pref;
6718
6719               memset (&dummy_use, 0, sizeof (dummy_use));
6720               dummy_use.iv = info->iv;
6721               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6722                 {
6723                   cand = iv_use (data, i)->selected;
6724                   if (cand == best_cand)
6725                     continue;
6726                   cand_pref = operand_equal_p (cand->iv->step,
6727                                                info->iv->step, 0)
6728                     ? 4 : 0;
6729                   cand_pref
6730                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6731                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6732                     ? 2 : 0;
6733                   cand_pref
6734                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6735                     ? 1 : 0;
6736                   if (best_cand == NULL || best_pref < cand_pref)
6737                     {
6738                       best_cand = cand;
6739                       best_pref = cand_pref;
6740                     }
6741                 }
6742
6743               if (!best_cand)
6744                 continue;
6745
6746               tree comp = get_computation_at (data->current_loop,
6747                                               &dummy_use, best_cand,
6748                                               SSA_NAME_DEF_STMT (def));
6749               if (!comp)
6750                 continue;
6751
6752               if (count > 1)
6753                 {
6754                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6755                   DECL_ARTIFICIAL (vexpr) = 1;
6756                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6757                   if (SSA_NAME_VAR (def))
6758                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6759                   else
6760                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6761                   gdebug *def_temp
6762                     = gimple_build_debug_bind (vexpr, comp, NULL);
6763                   gimple_stmt_iterator gsi;
6764
6765                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6766                     gsi = gsi_after_labels (gimple_bb
6767                                             (SSA_NAME_DEF_STMT (def)));
6768                   else
6769                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6770
6771                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6772                   comp = vexpr;
6773                 }
6774
6775               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6776                 {
6777                   if (!gimple_debug_bind_p (stmt))
6778                     continue;
6779
6780                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6781                     SET_USE (use_p, comp);
6782
6783                   update_stmt (stmt);
6784                 }
6785             }
6786         }
6787     }
6788
6789   release_defs_bitset (toremove);
6790
6791   BITMAP_FREE (toremove);
6792 }
6793
6794 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6795    for hash_map::traverse.  */
6796
6797 bool
6798 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6799 {
6800   free (value);
6801   return true;
6802 }
6803
6804 /* Frees data allocated by the optimization of a single loop.  */
6805
6806 static void
6807 free_loop_data (struct ivopts_data *data)
6808 {
6809   unsigned i, j;
6810   bitmap_iterator bi;
6811   tree obj;
6812
6813   if (data->niters)
6814     {
6815       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6816       delete data->niters;
6817       data->niters = NULL;
6818     }
6819
6820   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6821     {
6822       struct version_info *info;
6823
6824       info = ver_info (data, i);
6825       free (info->iv);
6826       info->iv = NULL;
6827       info->has_nonlin_use = false;
6828       info->preserve_biv = false;
6829       info->inv_id = 0;
6830     }
6831   bitmap_clear (data->relevant);
6832   bitmap_clear (data->important_candidates);
6833
6834   for (i = 0; i < n_iv_uses (data); i++)
6835     {
6836       struct iv_use *use = iv_use (data, i);
6837
6838       free (use->iv);
6839       BITMAP_FREE (use->related_cands);
6840       for (j = 0; j < use->n_map_members; j++)
6841         if (use->cost_map[j].depends_on)
6842           BITMAP_FREE (use->cost_map[j].depends_on);
6843       free (use->cost_map);
6844       free (use);
6845     }
6846   data->iv_uses.truncate (0);
6847
6848   for (i = 0; i < n_iv_cands (data); i++)
6849     {
6850       struct iv_cand *cand = iv_cand (data, i);
6851
6852       free (cand->iv);
6853       if (cand->depends_on)
6854         BITMAP_FREE (cand->depends_on);
6855       free (cand);
6856     }
6857   data->iv_candidates.truncate (0);
6858
6859   if (data->version_info_size < num_ssa_names)
6860     {
6861       data->version_info_size = 2 * num_ssa_names;
6862       free (data->version_info);
6863       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6864     }
6865
6866   data->max_inv_id = 0;
6867
6868   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6869     SET_DECL_RTL (obj, NULL_RTX);
6870
6871   decl_rtl_to_reset.truncate (0);
6872
6873   data->inv_expr_tab->empty ();
6874   data->inv_expr_id = 0;
6875 }
6876
6877 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6878    loop tree.  */
6879
6880 static void
6881 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6882 {
6883   free_loop_data (data);
6884   free (data->version_info);
6885   BITMAP_FREE (data->relevant);
6886   BITMAP_FREE (data->important_candidates);
6887
6888   decl_rtl_to_reset.release ();
6889   data->iv_uses.release ();
6890   data->iv_candidates.release ();
6891   delete data->inv_expr_tab;
6892   data->inv_expr_tab = NULL;
6893   free_affine_expand_cache (&data->name_expansion_cache);
6894 }
6895
6896 /* Returns true if the loop body BODY includes any function calls.  */
6897
6898 static bool
6899 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6900 {
6901   gimple_stmt_iterator gsi;
6902   unsigned i;
6903
6904   for (i = 0; i < num_nodes; i++)
6905     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6906       {
6907         gimple stmt = gsi_stmt (gsi);
6908         if (is_gimple_call (stmt)
6909             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6910           return true;
6911       }
6912   return false;
6913 }
6914
6915 /* Optimizes the LOOP.  Returns true if anything changed.  */
6916
6917 static bool
6918 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6919 {
6920   bool changed = false;
6921   struct iv_ca *iv_ca;
6922   edge exit = single_dom_exit (loop);
6923   basic_block *body;
6924
6925   gcc_assert (!data->niters);
6926   data->current_loop = loop;
6927   data->speed = optimize_loop_for_speed_p (loop);
6928
6929   if (dump_file && (dump_flags & TDF_DETAILS))
6930     {
6931       fprintf (dump_file, "Processing loop %d\n", loop->num);
6932
6933       if (exit)
6934         {
6935           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6936                    exit->src->index, exit->dest->index);
6937           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6938           fprintf (dump_file, "\n");
6939         }
6940
6941       fprintf (dump_file, "\n");
6942     }
6943
6944   body = get_loop_body (loop);
6945   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6946   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6947   free (body);
6948
6949   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6950
6951   /* For each ssa name determines whether it behaves as an induction variable
6952      in some loop.  */
6953   if (!find_induction_variables (data))
6954     goto finish;
6955
6956   /* Finds interesting uses (item 1).  */
6957   find_interesting_uses (data);
6958   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6959     goto finish;
6960
6961   /* Finds candidates for the induction variables (item 2).  */
6962   find_iv_candidates (data);
6963
6964   /* Calculates the costs (item 3, part 1).  */
6965   determine_iv_costs (data);
6966   determine_use_iv_costs (data);
6967   determine_set_costs (data);
6968
6969   /* Find the optimal set of induction variables (item 3, part 2).  */
6970   iv_ca = find_optimal_iv_set (data);
6971   if (!iv_ca)
6972     goto finish;
6973   changed = true;
6974
6975   /* Create the new induction variables (item 4, part 1).  */
6976   create_new_ivs (data, iv_ca);
6977   iv_ca_free (&iv_ca);
6978
6979   /* Rewrite the uses (item 4, part 2).  */
6980   rewrite_uses (data);
6981
6982   /* Remove the ivs that are unused after rewriting.  */
6983   remove_unused_ivs (data);
6984
6985   /* We have changed the structure of induction variables; it might happen
6986      that definitions in the scev database refer to some of them that were
6987      eliminated.  */
6988   scev_reset ();
6989
6990 finish:
6991   free_loop_data (data);
6992
6993   return changed;
6994 }
6995
6996 /* Main entry point.  Optimizes induction variables in loops.  */
6997
6998 void
6999 tree_ssa_iv_optimize (void)
7000 {
7001   struct loop *loop;
7002   struct ivopts_data data;
7003
7004   tree_ssa_iv_optimize_init (&data);
7005
7006   /* Optimize the loops starting with the innermost ones.  */
7007   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7008     {
7009       if (dump_file && (dump_flags & TDF_DETAILS))
7010         flow_loop_dump (loop, dump_file, NULL, 1);
7011
7012       tree_ssa_iv_optimize_loop (&data, loop);
7013     }
7014
7015   tree_ssa_iv_optimize_finalize (&data);
7016 }