gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "hash-set.h"
  69 #include "machmode.h"
  70 #include "vec.h"
  71 #include "double-int.h"
  72 #include "input.h"
  73 #include "alias.h"
  74 #include "symtab.h"
  75 #include "wide-int.h"
  76 #include "inchash.h"
  77 #include "tree.h"
  78 #include "fold-const.h"
  79 #include "stor-layout.h"
  80 #include "tm_p.h"
  81 #include "predict.h"
  82 #include "hard-reg-set.h"
  83 #include "function.h"
  84 #include "dominance.h"
  85 #include "cfg.h"
  86 #include "basic-block.h"
  87 #include "gimple-pretty-print.h"
  88 #include "hash-map.h"
  89 #include "hash-table.h"
  90 #include "tree-ssa-alias.h"
  91 #include "internal-fn.h"
  92 #include "tree-eh.h"
  93 #include "gimple-expr.h"
  94 #include "is-a.h"
  95 #include "gimple.h"
  96 #include "gimplify.h"
  97 #include "gimple-iterator.h"
  98 #include "gimplify-me.h"
  99 #include "gimple-ssa.h"
 100 #include "plugin-api.h"
 101 #include "ipa-ref.h"
 102 #include "cgraph.h"
 103 #include "tree-cfg.h"
 104 #include "tree-phinodes.h"
 105 #include "ssa-iterators.h"
 106 #include "stringpool.h"
 107 #include "tree-ssanames.h"
 108 #include "tree-ssa-loop-ivopts.h"
 109 #include "tree-ssa-loop-manip.h"
 110 #include "tree-ssa-loop-niter.h"
 111 #include "tree-ssa-loop.h"
 112 #include "hashtab.h"
 113 #include "rtl.h"
 114 #include "flags.h"
 115 #include "statistics.h"
 116 #include "real.h"
 117 #include "fixed-value.h"
 118 #include "insn-config.h"
 119 #include "expmed.h"
 120 #include "dojump.h"
 121 #include "explow.h"
 122 #include "calls.h"
 123 #include "emit-rtl.h"
 124 #include "varasm.h"
 125 #include "stmt.h"
 126 #include "expr.h"
 127 #include "tree-dfa.h"
 128 #include "tree-ssa.h"
 129 #include "cfgloop.h"
 130 #include "tree-pass.h"
 131 #include "tree-chrec.h"
 132 #include "tree-scalar-evolution.h"
 133 #include "params.h"
 134 #include "langhooks.h"
 135 #include "tree-affine.h"
 136 #include "target.h"
 137 #include "tree-inline.h"
 138 #include "tree-ssa-propagate.h"
 139 #include "tree-ssa-address.h"
 140 #include "builtins.h"
 141 #include "tree-vectorizer.h"
 142
 143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 144    cost of different addressing modes.  This should be moved to a TBD
 145    interface between the GIMPLE and RTL worlds.  */
 146 #include "recog.h"
 147
 148 /* The infinite cost.  */
 149 #define INFTY 10000000
 150
 151 #define AVG_LOOP_NITER(LOOP) 5
 152
 153 /* Returns the expected number of loop iterations for LOOP.
 154    The average trip count is computed from profile data if it
 155    exists. */
 156
 157 static inline HOST_WIDE_INT
 158 avg_loop_niter (struct loop *loop)
 159 {
 160   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 161   if (niter == -1)
 162     return AVG_LOOP_NITER (loop);
 163
 164   return niter;
 165 }
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   bool biv_p;           /* Is it a biv?  */
 175   bool have_use_for;    /* Do we already have a use for it?  */
 176   unsigned use_id;      /* The identifier in the use if it is the case.  */
 177 };
 178
 179 /* Per-ssa version information (induction variable descriptions, etc.).  */
 180 struct version_info
 181 {
 182   tree name;            /* The ssa name.  */
 183   struct iv *iv;        /* Induction variable description.  */
 184   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 185                            an expression that is not an induction variable.  */
 186   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 187   unsigned inv_id;      /* Id of an invariant.  */
 188 };
 189
 190 /* Types of uses.  */
 191 enum use_type
 192 {
 193   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 194   USE_ADDRESS,          /* Use in an address.  */
 195   USE_COMPARE           /* Use is a compare.  */
 196 };
 197
 198 /* Cost of a computation.  */
 199 typedef struct
 200 {
 201   int cost;             /* The runtime cost.  */
 202   unsigned complexity;  /* The estimate of the complexity of the code for
 203                            the computation (in no concrete units --
 204                            complexity field should be larger for more
 205                            complex expressions and addressing modes).  */
 206 } comp_cost;
 207
 208 static const comp_cost no_cost = {0, 0};
 209 static const comp_cost infinite_cost = {INFTY, INFTY};
 210
 211 /* The candidate - cost pair.  */
 212 struct cost_pair
 213 {
 214   struct iv_cand *cand; /* The candidate.  */
 215   comp_cost cost;       /* The cost.  */
 216   bitmap depends_on;    /* The list of invariants that have to be
 217                            preserved.  */
 218   tree value;           /* For final value elimination, the expression for
 219                            the final value of the iv.  For iv elimination,
 220                            the new bound to compare with.  */
 221   enum tree_code comp;  /* For iv elimination, the comparison.  */
 222   int inv_expr_id;      /* Loop invariant expression id.  */
 223 };
 224
 225 /* Use.  */
 226 struct iv_use
 227 {
 228   unsigned id;          /* The id of the use.  */
 229   enum use_type type;   /* Type of the use.  */
 230   struct iv *iv;        /* The induction variable it is based on.  */
 231   gimple stmt;          /* Statement in that it occurs.  */
 232   tree *op_p;           /* The place where it occurs.  */
 233   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 234                            important ones.  */
 235
 236   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 237   struct cost_pair *cost_map;
 238                         /* The costs wrto the iv candidates.  */
 239
 240   struct iv_cand *selected;
 241                         /* The selected candidate.  */
 242 };
 243
 244 /* The position where the iv is computed.  */
 245 enum iv_position
 246 {
 247   IP_NORMAL,            /* At the end, just before the exit condition.  */
 248   IP_END,               /* At the end of the latch block.  */
 249   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 250   IP_AFTER_USE,         /* Immediately after a specific use.  */
 251   IP_ORIGINAL           /* The original biv.  */
 252 };
 253
 254 /* The induction variable candidate.  */
 255 struct iv_cand
 256 {
 257   unsigned id;          /* The number of the candidate.  */
 258   bool important;       /* Whether this is an "important" candidate, i.e. such
 259                            that it should be considered by all uses.  */
 260   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 261   gimple incremented_at;/* For original biv, the statement where it is
 262                            incremented.  */
 263   tree var_before;      /* The variable used for it before increment.  */
 264   tree var_after;       /* The variable used for it after increment.  */
 265   struct iv *iv;        /* The value of the candidate.  NULL for
 266                            "pseudocandidate" used to indicate the possibility
 267                            to replace the final value of an iv by direct
 268                            computation of the value.  */
 269   unsigned cost;        /* Cost of the candidate.  */
 270   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 271   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 272                               where it is incremented.  */
 273   bitmap depends_on;    /* The list of invariants that are used in step of the
 274                            biv.  */
 275 };
 276
 277 /* Loop invariant expression hashtable entry.  */
 278 struct iv_inv_expr_ent
 279 {
 280   tree expr;
 281   int id;
 282   hashval_t hash;
 283 };
 284
 285 /* The data used by the induction variable optimizations.  */
 286
 287 typedef struct iv_use *iv_use_p;
 288
 289 typedef struct iv_cand *iv_cand_p;
 290
 291 /* Hashtable helpers.  */
 292
 293 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 294 {
 295   typedef iv_inv_expr_ent *value_type;
 296   typedef iv_inv_expr_ent *compare_type;
 297   static inline hashval_t hash (const iv_inv_expr_ent *);
 298   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 299 };
 300
 301 /* Hash function for loop invariant expressions.  */
 302
 303 inline hashval_t
 304 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 305 {
 306   return expr->hash;
 307 }
 308
 309 /* Hash table equality function for expressions.  */
 310
 311 inline bool
 312 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 313                            const iv_inv_expr_ent *expr2)
 314 {
 315   return expr1->hash == expr2->hash
 316          && operand_equal_p (expr1->expr, expr2->expr, 0);
 317 }
 318
 319 struct ivopts_data
 320 {
 321   /* The currently optimized loop.  */
 322   struct loop *current_loop;
 323   source_location loop_loc;
 324
 325   /* Numbers of iterations for all exits of the current loop.  */
 326   hash_map<edge, tree_niter_desc *> *niters;
 327
 328   /* Number of registers used in it.  */
 329   unsigned regs_used;
 330
 331   /* The size of version_info array allocated.  */
 332   unsigned version_info_size;
 333
 334   /* The array of information for the ssa names.  */
 335   struct version_info *version_info;
 336
 337   /* The hashtable of loop invariant expressions created
 338      by ivopt.  */
 339   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 340
 341   /* Loop invariant expression id.  */
 342   int inv_expr_id;
 343
 344   /* The bitmap of indices in version_info whose value was changed.  */
 345   bitmap relevant;
 346
 347   /* The uses of induction variables.  */
 348   vec<iv_use_p> iv_uses;
 349
 350   /* The candidates.  */
 351   vec<iv_cand_p> iv_candidates;
 352
 353   /* A bitmap of important candidates.  */
 354   bitmap important_candidates;
 355
 356   /* Cache used by tree_to_aff_combination_expand.  */
 357   hash_map<tree, name_expansion *> *name_expansion_cache;
 358
 359   /* The maximum invariant id.  */
 360   unsigned max_inv_id;
 361
 362   /* Whether to consider just related and important candidates when replacing a
 363      use.  */
 364   bool consider_all_candidates;
 365
 366   /* Are we optimizing for speed?  */
 367   bool speed;
 368
 369   /* Whether the loop body includes any function calls.  */
 370   bool body_includes_call;
 371
 372   /* Whether the loop body can only be exited via single exit.  */
 373   bool loop_single_exit_p;
 374 };
 375
 376 /* An assignment of iv candidates to uses.  */
 377
 378 struct iv_ca
 379 {
 380   /* The number of uses covered by the assignment.  */
 381   unsigned upto;
 382
 383   /* Number of uses that cannot be expressed by the candidates in the set.  */
 384   unsigned bad_uses;
 385
 386   /* Candidate assigned to a use, together with the related costs.  */
 387   struct cost_pair **cand_for_use;
 388
 389   /* Number of times each candidate is used.  */
 390   unsigned *n_cand_uses;
 391
 392   /* The candidates used.  */
 393   bitmap cands;
 394
 395   /* The number of candidates in the set.  */
 396   unsigned n_cands;
 397
 398   /* Total number of registers needed.  */
 399   unsigned n_regs;
 400
 401   /* Total cost of expressing uses.  */
 402   comp_cost cand_use_cost;
 403
 404   /* Total cost of candidates.  */
 405   unsigned cand_cost;
 406
 407   /* Number of times each invariant is used.  */
 408   unsigned *n_invariant_uses;
 409
 410   /* The array holding the number of uses of each loop
 411      invariant expressions created by ivopt.  */
 412   unsigned *used_inv_expr;
 413
 414   /* The number of created loop invariants.  */
 415   unsigned num_used_inv_expr;
 416
 417   /* Total cost of the assignment.  */
 418   comp_cost cost;
 419 };
 420
 421 /* Difference of two iv candidate assignments.  */
 422
 423 struct iv_ca_delta
 424 {
 425   /* Changed use.  */
 426   struct iv_use *use;
 427
 428   /* An old assignment (for rollback purposes).  */
 429   struct cost_pair *old_cp;
 430
 431   /* A new assignment.  */
 432   struct cost_pair *new_cp;
 433
 434   /* Next change in the list.  */
 435   struct iv_ca_delta *next_change;
 436 };
 437
 438 /* Bound on number of candidates below that all candidates are considered.  */
 439
 440 #define CONSIDER_ALL_CANDIDATES_BOUND \
 441   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 442
 443 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 444    optimizing such a loop would help, and it would take ages).  */
 445
 446 #define MAX_CONSIDERED_USES \
 447   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 448
 449 /* If there are at most this number of ivs in the set, try removing unnecessary
 450    ivs from the set always.  */
 451
 452 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 453   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 454
 455 /* The list of trees for that the decl_rtl field must be reset is stored
 456    here.  */
 457
 458 static vec<tree> decl_rtl_to_reset;
 459
 460 static comp_cost force_expr_to_var_cost (tree, bool);
 461
 462 /* Number of uses recorded in DATA.  */
 463
 464 static inline unsigned
 465 n_iv_uses (struct ivopts_data *data)
 466 {
 467   return data->iv_uses.length ();
 468 }
 469
 470 /* Ith use recorded in DATA.  */
 471
 472 static inline struct iv_use *
 473 iv_use (struct ivopts_data *data, unsigned i)
 474 {
 475   return data->iv_uses[i];
 476 }
 477
 478 /* Number of candidates recorded in DATA.  */
 479
 480 static inline unsigned
 481 n_iv_cands (struct ivopts_data *data)
 482 {
 483   return data->iv_candidates.length ();
 484 }
 485
 486 /* Ith candidate recorded in DATA.  */
 487
 488 static inline struct iv_cand *
 489 iv_cand (struct ivopts_data *data, unsigned i)
 490 {
 491   return data->iv_candidates[i];
 492 }
 493
 494 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 495
 496 edge
 497 single_dom_exit (struct loop *loop)
 498 {
 499   edge exit = single_exit (loop);
 500
 501   if (!exit)
 502     return NULL;
 503
 504   if (!just_once_each_iteration_p (loop, exit->src))
 505     return NULL;
 506
 507   return exit;
 508 }
 509
 510 /* Dumps information about the induction variable IV to FILE.  */
 511
 512 void
 513 dump_iv (FILE *file, struct iv *iv)
 514 {
 515   if (iv->ssa_name)
 516     {
 517       fprintf (file, "ssa name ");
 518       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 519       fprintf (file, "\n");
 520     }
 521
 522   fprintf (file, "  type ");
 523   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 524   fprintf (file, "\n");
 525
 526   if (iv->step)
 527     {
 528       fprintf (file, "  base ");
 529       print_generic_expr (file, iv->base, TDF_SLIM);
 530       fprintf (file, "\n");
 531
 532       fprintf (file, "  step ");
 533       print_generic_expr (file, iv->step, TDF_SLIM);
 534       fprintf (file, "\n");
 535     }
 536   else
 537     {
 538       fprintf (file, "  invariant ");
 539       print_generic_expr (file, iv->base, TDF_SLIM);
 540       fprintf (file, "\n");
 541     }
 542
 543   if (iv->base_object)
 544     {
 545       fprintf (file, "  base object ");
 546       print_generic_expr (file, iv->base_object, TDF_SLIM);
 547       fprintf (file, "\n");
 548     }
 549
 550   if (iv->biv_p)
 551     fprintf (file, "  is a biv\n");
 552 }
 553
 554 /* Dumps information about the USE to FILE.  */
 555
 556 void
 557 dump_use (FILE *file, struct iv_use *use)
 558 {
 559   fprintf (file, "use %d\n", use->id);
 560
 561   switch (use->type)
 562     {
 563     case USE_NONLINEAR_EXPR:
 564       fprintf (file, "  generic\n");
 565       break;
 566
 567     case USE_ADDRESS:
 568       fprintf (file, "  address\n");
 569       break;
 570
 571     case USE_COMPARE:
 572       fprintf (file, "  compare\n");
 573       break;
 574
 575     default:
 576       gcc_unreachable ();
 577     }
 578
 579   fprintf (file, "  in statement ");
 580   print_gimple_stmt (file, use->stmt, 0, 0);
 581   fprintf (file, "\n");
 582
 583   fprintf (file, "  at position ");
 584   if (use->op_p)
 585     print_generic_expr (file, *use->op_p, TDF_SLIM);
 586   fprintf (file, "\n");
 587
 588   dump_iv (file, use->iv);
 589
 590   if (use->related_cands)
 591     {
 592       fprintf (file, "  related candidates ");
 593       dump_bitmap (file, use->related_cands);
 594     }
 595 }
 596
 597 /* Dumps information about the uses to FILE.  */
 598
 599 void
 600 dump_uses (FILE *file, struct ivopts_data *data)
 601 {
 602   unsigned i;
 603   struct iv_use *use;
 604
 605   for (i = 0; i < n_iv_uses (data); i++)
 606     {
 607       use = iv_use (data, i);
 608
 609       dump_use (file, use);
 610       fprintf (file, "\n");
 611     }
 612 }
 613
 614 /* Dumps information about induction variable candidate CAND to FILE.  */
 615
 616 void
 617 dump_cand (FILE *file, struct iv_cand *cand)
 618 {
 619   struct iv *iv = cand->iv;
 620
 621   fprintf (file, "candidate %d%s\n",
 622            cand->id, cand->important ? " (important)" : "");
 623
 624   if (cand->depends_on)
 625     {
 626       fprintf (file, "  depends on ");
 627       dump_bitmap (file, cand->depends_on);
 628     }
 629
 630   if (!iv)
 631     {
 632       fprintf (file, "  final value replacement\n");
 633       return;
 634     }
 635
 636   if (cand->var_before)
 637     {
 638       fprintf (file, "  var_before ");
 639       print_generic_expr (file, cand->var_before, TDF_SLIM);
 640       fprintf (file, "\n");
 641     }
 642   if (cand->var_after)
 643     {
 644       fprintf (file, "  var_after ");
 645       print_generic_expr (file, cand->var_after, TDF_SLIM);
 646       fprintf (file, "\n");
 647     }
 648
 649   switch (cand->pos)
 650     {
 651     case IP_NORMAL:
 652       fprintf (file, "  incremented before exit test\n");
 653       break;
 654
 655     case IP_BEFORE_USE:
 656       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 657       break;
 658
 659     case IP_AFTER_USE:
 660       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 661       break;
 662
 663     case IP_END:
 664       fprintf (file, "  incremented at end\n");
 665       break;
 666
 667     case IP_ORIGINAL:
 668       fprintf (file, "  original biv\n");
 669       break;
 670     }
 671
 672   dump_iv (file, iv);
 673 }
 674
 675 /* Returns the info for ssa version VER.  */
 676
 677 static inline struct version_info *
 678 ver_info (struct ivopts_data *data, unsigned ver)
 679 {
 680   return data->version_info + ver;
 681 }
 682
 683 /* Returns the info for ssa name NAME.  */
 684
 685 static inline struct version_info *
 686 name_info (struct ivopts_data *data, tree name)
 687 {
 688   return ver_info (data, SSA_NAME_VERSION (name));
 689 }
 690
 691 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 692    emitted in LOOP.  */
 693
 694 static bool
 695 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 696 {
 697   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 698
 699   gcc_assert (bb);
 700
 701   if (sbb == loop->latch)
 702     return true;
 703
 704   if (sbb != bb)
 705     return false;
 706
 707   return stmt == last_stmt (bb);
 708 }
 709
 710 /* Returns true if STMT if after the place where the original induction
 711    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 712    if the positions are identical.  */
 713
 714 static bool
 715 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 716 {
 717   basic_block cand_bb = gimple_bb (cand->incremented_at);
 718   basic_block stmt_bb = gimple_bb (stmt);
 719
 720   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 721     return false;
 722
 723   if (stmt_bb != cand_bb)
 724     return true;
 725
 726   if (true_if_equal
 727       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 728     return true;
 729   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 730 }
 731
 732 /* Returns true if STMT if after the place where the induction variable
 733    CAND is incremented in LOOP.  */
 734
 735 static bool
 736 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 737 {
 738   switch (cand->pos)
 739     {
 740     case IP_END:
 741       return false;
 742
 743     case IP_NORMAL:
 744       return stmt_after_ip_normal_pos (loop, stmt);
 745
 746     case IP_ORIGINAL:
 747     case IP_AFTER_USE:
 748       return stmt_after_inc_pos (cand, stmt, false);
 749
 750     case IP_BEFORE_USE:
 751       return stmt_after_inc_pos (cand, stmt, true);
 752
 753     default:
 754       gcc_unreachable ();
 755     }
 756 }
 757
 758 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 759
 760 static bool
 761 abnormal_ssa_name_p (tree exp)
 762 {
 763   if (!exp)
 764     return false;
 765
 766   if (TREE_CODE (exp) != SSA_NAME)
 767     return false;
 768
 769   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 770 }
 771
 772 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 773    abnormal phi node.  Callback for for_each_index.  */
 774
 775 static bool
 776 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 777                                   void *data ATTRIBUTE_UNUSED)
 778 {
 779   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 780     {
 781       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 782         return false;
 783       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 784         return false;
 785     }
 786
 787   return !abnormal_ssa_name_p (*index);
 788 }
 789
 790 /* Returns true if EXPR contains a ssa name that occurs in an
 791    abnormal phi node.  */
 792
 793 bool
 794 contains_abnormal_ssa_name_p (tree expr)
 795 {
 796   enum tree_code code;
 797   enum tree_code_class codeclass;
 798
 799   if (!expr)
 800     return false;
 801
 802   code = TREE_CODE (expr);
 803   codeclass = TREE_CODE_CLASS (code);
 804
 805   if (code == SSA_NAME)
 806     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 807
 808   if (code == INTEGER_CST
 809       || is_gimple_min_invariant (expr))
 810     return false;
 811
 812   if (code == ADDR_EXPR)
 813     return !for_each_index (&TREE_OPERAND (expr, 0),
 814                             idx_contains_abnormal_ssa_name_p,
 815                             NULL);
 816
 817   if (code == COND_EXPR)
 818     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 819       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 820       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 821
 822   switch (codeclass)
 823     {
 824     case tcc_binary:
 825     case tcc_comparison:
 826       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 827         return true;
 828
 829       /* Fallthru.  */
 830     case tcc_unary:
 831       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 832         return true;
 833
 834       break;
 835
 836     default:
 837       gcc_unreachable ();
 838     }
 839
 840   return false;
 841 }
 842
 843 /*  Returns the structure describing number of iterations determined from
 844     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 845
 846 static struct tree_niter_desc *
 847 niter_for_exit (struct ivopts_data *data, edge exit)
 848 {
 849   struct tree_niter_desc *desc;
 850   tree_niter_desc **slot;
 851
 852   if (!data->niters)
 853     {
 854       data->niters = new hash_map<edge, tree_niter_desc *>;
 855       slot = NULL;
 856     }
 857   else
 858     slot = data->niters->get (exit);
 859
 860   if (!slot)
 861     {
 862       /* Try to determine number of iterations.  We cannot safely work with ssa
 863          names that appear in phi nodes on abnormal edges, so that we do not
 864          create overlapping life ranges for them (PR 27283).  */
 865       desc = XNEW (struct tree_niter_desc);
 866       if (!number_of_iterations_exit (data->current_loop,
 867                                       exit, desc, true)
 868           || contains_abnormal_ssa_name_p (desc->niter))
 869         {
 870           XDELETE (desc);
 871           desc = NULL;
 872         }
 873       data->niters->put (exit, desc);
 874     }
 875   else
 876     desc = *slot;
 877
 878   return desc;
 879 }
 880
 881 /* Returns the structure describing number of iterations determined from
 882    single dominating exit of DATA->current_loop, or NULL if something
 883    goes wrong.  */
 884
 885 static struct tree_niter_desc *
 886 niter_for_single_dom_exit (struct ivopts_data *data)
 887 {
 888   edge exit = single_dom_exit (data->current_loop);
 889
 890   if (!exit)
 891     return NULL;
 892
 893   return niter_for_exit (data, exit);
 894 }
 895
 896 /* Initializes data structures used by the iv optimization pass, stored
 897    in DATA.  */
 898
 899 static void
 900 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 901 {
 902   data->version_info_size = 2 * num_ssa_names;
 903   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 904   data->relevant = BITMAP_ALLOC (NULL);
 905   data->important_candidates = BITMAP_ALLOC (NULL);
 906   data->max_inv_id = 0;
 907   data->niters = NULL;
 908   data->iv_uses.create (20);
 909   data->iv_candidates.create (20);
 910   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 911   data->inv_expr_id = 0;
 912   data->name_expansion_cache = NULL;
 913   decl_rtl_to_reset.create (20);
 914 }
 915
 916 /* Returns a memory object to that EXPR points.  In case we are able to
 917    determine that it does not point to any such object, NULL is returned.  */
 918
 919 static tree
 920 determine_base_object (tree expr)
 921 {
 922   enum tree_code code = TREE_CODE (expr);
 923   tree base, obj;
 924
 925   /* If this is a pointer casted to any type, we need to determine
 926      the base object for the pointer; so handle conversions before
 927      throwing away non-pointer expressions.  */
 928   if (CONVERT_EXPR_P (expr))
 929     return determine_base_object (TREE_OPERAND (expr, 0));
 930
 931   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 932     return NULL_TREE;
 933
 934   switch (code)
 935     {
 936     case INTEGER_CST:
 937       return NULL_TREE;
 938
 939     case ADDR_EXPR:
 940       obj = TREE_OPERAND (expr, 0);
 941       base = get_base_address (obj);
 942
 943       if (!base)
 944         return expr;
 945
 946       if (TREE_CODE (base) == MEM_REF)
 947         return determine_base_object (TREE_OPERAND (base, 0));
 948
 949       return fold_convert (ptr_type_node,
 950                            build_fold_addr_expr (base));
 951
 952     case POINTER_PLUS_EXPR:
 953       return determine_base_object (TREE_OPERAND (expr, 0));
 954
 955     case PLUS_EXPR:
 956     case MINUS_EXPR:
 957       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 958       gcc_unreachable ();
 959
 960     default:
 961       return fold_convert (ptr_type_node, expr);
 962     }
 963 }
 964
 965 /* Return true if address expression with non-DECL_P operand appears
 966    in EXPR.  */
 967
 968 static bool
 969 contain_complex_addr_expr (tree expr)
 970 {
 971   bool res = false;
 972
 973   STRIP_NOPS (expr);
 974   switch (TREE_CODE (expr))
 975     {
 976     case POINTER_PLUS_EXPR:
 977     case PLUS_EXPR:
 978     case MINUS_EXPR:
 979       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 980       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 981       break;
 982
 983     case ADDR_EXPR:
 984       return (!DECL_P (TREE_OPERAND (expr, 0)));
 985
 986     default:
 987       return false;
 988     }
 989
 990   return res;
 991 }
 992
 993 /* Allocates an induction variable with given initial value BASE and step STEP
 994    for loop LOOP.  */
 995
 996 static struct iv *
 997 alloc_iv (tree base, tree step)
 998 {
 999   tree expr = base;
1000   struct iv *iv = XCNEW (struct iv);
1001   gcc_assert (step != NULL_TREE);
1002
1003   /* Lower address expression in base except ones with DECL_P as operand.
1004      By doing this:
1005        1) More accurate cost can be computed for address expressions;
1006        2) Duplicate candidates won't be created for bases in different
1007           forms, like &a[0] and &a.  */
1008   STRIP_NOPS (expr);
1009   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1010       || contain_complex_addr_expr (expr))
1011     {
1012       aff_tree comb;
1013       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1014       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1015     }
1016
1017   iv->base = base;
1018   iv->base_object = determine_base_object (base);
1019   iv->step = step;
1020   iv->biv_p = false;
1021   iv->have_use_for = false;
1022   iv->use_id = 0;
1023   iv->ssa_name = NULL_TREE;
1024
1025   return iv;
1026 }
1027
1028 /* Sets STEP and BASE for induction variable IV.  */
1029
1030 static void
1031 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1032 {
1033   struct version_info *info = name_info (data, iv);
1034
1035   gcc_assert (!info->iv);
1036
1037   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1038   info->iv = alloc_iv (base, step);
1039   info->iv->ssa_name = iv;
1040 }
1041
1042 /* Finds induction variable declaration for VAR.  */
1043
1044 static struct iv *
1045 get_iv (struct ivopts_data *data, tree var)
1046 {
1047   basic_block bb;
1048   tree type = TREE_TYPE (var);
1049
1050   if (!POINTER_TYPE_P (type)
1051       && !INTEGRAL_TYPE_P (type))
1052     return NULL;
1053
1054   if (!name_info (data, var)->iv)
1055     {
1056       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1057
1058       if (!bb
1059           || !flow_bb_inside_loop_p (data->current_loop, bb))
1060         set_iv (data, var, var, build_int_cst (type, 0));
1061     }
1062
1063   return name_info (data, var)->iv;
1064 }
1065
1066 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1067    not define a simple affine biv with nonzero step.  */
1068
1069 static tree
1070 determine_biv_step (gphi *phi)
1071 {
1072   struct loop *loop = gimple_bb (phi)->loop_father;
1073   tree name = PHI_RESULT (phi);
1074   affine_iv iv;
1075
1076   if (virtual_operand_p (name))
1077     return NULL_TREE;
1078
1079   if (!simple_iv (loop, loop, name, &iv, true))
1080     return NULL_TREE;
1081
1082   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1083 }
1084
1085 /* Return the first non-invariant ssa var found in EXPR.  */
1086
1087 static tree
1088 extract_single_var_from_expr (tree expr)
1089 {
1090   int i, n;
1091   tree tmp;
1092   enum tree_code code;
1093
1094   if (!expr || is_gimple_min_invariant (expr))
1095     return NULL;
1096
1097   code = TREE_CODE (expr);
1098   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1099     {
1100       n = TREE_OPERAND_LENGTH (expr);
1101       for (i = 0; i < n; i++)
1102         {
1103           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1104
1105           if (tmp)
1106             return tmp;
1107         }
1108     }
1109   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1110 }
1111
1112 /* Finds basic ivs.  */
1113
1114 static bool
1115 find_bivs (struct ivopts_data *data)
1116 {
1117   gphi *phi;
1118   tree step, type, base, stop;
1119   bool found = false;
1120   struct loop *loop = data->current_loop;
1121   gphi_iterator psi;
1122
1123   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1124     {
1125       phi = psi.phi ();
1126
1127       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1128         continue;
1129
1130       step = determine_biv_step (phi);
1131       if (!step)
1132         continue;
1133
1134       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1135       /* Stop expanding iv base at the first ssa var referred by iv step.
1136          Ideally we should stop at any ssa var, because that's expensive
1137          and unusual to happen, we just do it on the first one.
1138
1139          See PR64705 for the rationale.  */
1140       stop = extract_single_var_from_expr (step);
1141       base = expand_simple_operations (base, stop);
1142       if (contains_abnormal_ssa_name_p (base)
1143           || contains_abnormal_ssa_name_p (step))
1144         continue;
1145
1146       type = TREE_TYPE (PHI_RESULT (phi));
1147       base = fold_convert (type, base);
1148       if (step)
1149         {
1150           if (POINTER_TYPE_P (type))
1151             step = convert_to_ptrofftype (step);
1152           else
1153             step = fold_convert (type, step);
1154         }
1155
1156       set_iv (data, PHI_RESULT (phi), base, step);
1157       found = true;
1158     }
1159
1160   return found;
1161 }
1162
1163 /* Marks basic ivs.  */
1164
1165 static void
1166 mark_bivs (struct ivopts_data *data)
1167 {
1168   gphi *phi;
1169   gimple def;
1170   tree var;
1171   struct iv *iv, *incr_iv;
1172   struct loop *loop = data->current_loop;
1173   basic_block incr_bb;
1174   gphi_iterator psi;
1175
1176   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1177     {
1178       phi = psi.phi ();
1179
1180       iv = get_iv (data, PHI_RESULT (phi));
1181       if (!iv)
1182         continue;
1183
1184       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1185       def = SSA_NAME_DEF_STMT (var);
1186       /* Don't mark iv peeled from other one as biv.  */
1187       if (def
1188           && gimple_code (def) == GIMPLE_PHI
1189           && gimple_bb (def) == loop->header)
1190         continue;
1191
1192       incr_iv = get_iv (data, var);
1193       if (!incr_iv)
1194         continue;
1195
1196       /* If the increment is in the subloop, ignore it.  */
1197       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1198       if (incr_bb->loop_father != data->current_loop
1199           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1200         continue;
1201
1202       iv->biv_p = true;
1203       incr_iv->biv_p = true;
1204     }
1205 }
1206
1207 /* Checks whether STMT defines a linear induction variable and stores its
1208    parameters to IV.  */
1209
1210 static bool
1211 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1212 {
1213   tree lhs, stop;
1214   struct loop *loop = data->current_loop;
1215
1216   iv->base = NULL_TREE;
1217   iv->step = NULL_TREE;
1218
1219   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1220     return false;
1221
1222   lhs = gimple_assign_lhs (stmt);
1223   if (TREE_CODE (lhs) != SSA_NAME)
1224     return false;
1225
1226   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1227     return false;
1228
1229   /* Stop expanding iv base at the first ssa var referred by iv step.
1230      Ideally we should stop at any ssa var, because that's expensive
1231      and unusual to happen, we just do it on the first one.
1232
1233      See PR64705 for the rationale.  */
1234   stop = extract_single_var_from_expr (iv->step);
1235   iv->base = expand_simple_operations (iv->base, stop);
1236   if (contains_abnormal_ssa_name_p (iv->base)
1237       || contains_abnormal_ssa_name_p (iv->step))
1238     return false;
1239
1240   /* If STMT could throw, then do not consider STMT as defining a GIV.
1241      While this will suppress optimizations, we can not safely delete this
1242      GIV and associated statements, even if it appears it is not used.  */
1243   if (stmt_could_throw_p (stmt))
1244     return false;
1245
1246   return true;
1247 }
1248
1249 /* Finds general ivs in statement STMT.  */
1250
1251 static void
1252 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1253 {
1254   affine_iv iv;
1255
1256   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1257     return;
1258
1259   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1260 }
1261
1262 /* Finds general ivs in basic block BB.  */
1263
1264 static void
1265 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1266 {
1267   gimple_stmt_iterator bsi;
1268
1269   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1270     find_givs_in_stmt (data, gsi_stmt (bsi));
1271 }
1272
1273 /* Finds general ivs.  */
1274
1275 static void
1276 find_givs (struct ivopts_data *data)
1277 {
1278   struct loop *loop = data->current_loop;
1279   basic_block *body = get_loop_body_in_dom_order (loop);
1280   unsigned i;
1281
1282   for (i = 0; i < loop->num_nodes; i++)
1283     find_givs_in_bb (data, body[i]);
1284   free (body);
1285 }
1286
1287 /* For each ssa name defined in LOOP determines whether it is an induction
1288    variable and if so, its initial value and step.  */
1289
1290 static bool
1291 find_induction_variables (struct ivopts_data *data)
1292 {
1293   unsigned i;
1294   bitmap_iterator bi;
1295
1296   if (!find_bivs (data))
1297     return false;
1298
1299   find_givs (data);
1300   mark_bivs (data);
1301
1302   if (dump_file && (dump_flags & TDF_DETAILS))
1303     {
1304       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1305
1306       if (niter)
1307         {
1308           fprintf (dump_file, "  number of iterations ");
1309           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1310           if (!integer_zerop (niter->may_be_zero))
1311             {
1312               fprintf (dump_file, "; zero if ");
1313               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1314             }
1315           fprintf (dump_file, "\n\n");
1316         };
1317
1318       fprintf (dump_file, "Induction variables:\n\n");
1319
1320       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1321         {
1322           if (ver_info (data, i)->iv)
1323             dump_iv (dump_file, ver_info (data, i)->iv);
1324         }
1325     }
1326
1327   return true;
1328 }
1329
1330 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1331
1332 static struct iv_use *
1333 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1334             gimple stmt, enum use_type use_type)
1335 {
1336   struct iv_use *use = XCNEW (struct iv_use);
1337
1338   use->id = n_iv_uses (data);
1339   use->type = use_type;
1340   use->iv = iv;
1341   use->stmt = stmt;
1342   use->op_p = use_p;
1343   use->related_cands = BITMAP_ALLOC (NULL);
1344
1345   /* To avoid showing ssa name in the dumps, if it was not reset by the
1346      caller.  */
1347   iv->ssa_name = NULL_TREE;
1348
1349   if (dump_file && (dump_flags & TDF_DETAILS))
1350     dump_use (dump_file, use);
1351
1352   data->iv_uses.safe_push (use);
1353
1354   return use;
1355 }
1356
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358    NONLINEAR_USE is true if the invariant is used in a way we do not
1359    handle specially.  */
1360
1361 static void
1362 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1363 {
1364   basic_block bb;
1365   struct version_info *info;
1366
1367   if (TREE_CODE (op) != SSA_NAME
1368       || virtual_operand_p (op))
1369     return;
1370
1371   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1372   if (bb
1373       && flow_bb_inside_loop_p (data->current_loop, bb))
1374     return;
1375
1376   info = name_info (data, op);
1377   info->name = op;
1378   info->has_nonlin_use |= nonlinear_use;
1379   if (!info->inv_id)
1380     info->inv_id = ++data->max_inv_id;
1381   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1382 }
1383
1384 /* Checks whether the use OP is interesting and if so, records it.  */
1385
1386 static struct iv_use *
1387 find_interesting_uses_op (struct ivopts_data *data, tree op)
1388 {
1389   struct iv *iv;
1390   struct iv *civ;
1391   gimple stmt;
1392   struct iv_use *use;
1393
1394   if (TREE_CODE (op) != SSA_NAME)
1395     return NULL;
1396
1397   iv = get_iv (data, op);
1398   if (!iv)
1399     return NULL;
1400
1401   if (iv->have_use_for)
1402     {
1403       use = iv_use (data, iv->use_id);
1404
1405       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1406       return use;
1407     }
1408
1409   if (integer_zerop (iv->step))
1410     {
1411       record_invariant (data, op, true);
1412       return NULL;
1413     }
1414   iv->have_use_for = true;
1415
1416   civ = XNEW (struct iv);
1417   *civ = *iv;
1418
1419   stmt = SSA_NAME_DEF_STMT (op);
1420   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1421               || is_gimple_assign (stmt));
1422
1423   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1424   iv->use_id = use->id;
1425
1426   return use;
1427 }
1428
1429 /* Given a condition in statement STMT, checks whether it is a compare
1430    of an induction variable and an invariant.  If this is the case,
1431    CONTROL_VAR is set to location of the iv, BOUND to the location of
1432    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1433    induction variable descriptions, and true is returned.  If this is not
1434    the case, CONTROL_VAR and BOUND are set to the arguments of the
1435    condition and false is returned.  */
1436
1437 static bool
1438 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1439                        tree **control_var, tree **bound,
1440                        struct iv **iv_var, struct iv **iv_bound)
1441 {
1442   /* The objects returned when COND has constant operands.  */
1443   static struct iv const_iv;
1444   static tree zero;
1445   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1446   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1447   bool ret = false;
1448
1449   if (gimple_code (stmt) == GIMPLE_COND)
1450     {
1451       gcond *cond_stmt = as_a <gcond *> (stmt);
1452       op0 = gimple_cond_lhs_ptr (cond_stmt);
1453       op1 = gimple_cond_rhs_ptr (cond_stmt);
1454     }
1455   else
1456     {
1457       op0 = gimple_assign_rhs1_ptr (stmt);
1458       op1 = gimple_assign_rhs2_ptr (stmt);
1459     }
1460
1461   zero = integer_zero_node;
1462   const_iv.step = integer_zero_node;
1463
1464   if (TREE_CODE (*op0) == SSA_NAME)
1465     iv0 = get_iv (data, *op0);
1466   if (TREE_CODE (*op1) == SSA_NAME)
1467     iv1 = get_iv (data, *op1);
1468
1469   /* Exactly one of the compared values must be an iv, and the other one must
1470      be an invariant.  */
1471   if (!iv0 || !iv1)
1472     goto end;
1473
1474   if (integer_zerop (iv0->step))
1475     {
1476       /* Control variable may be on the other side.  */
1477       tmp_op = op0; op0 = op1; op1 = tmp_op;
1478       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1479     }
1480   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1481
1482 end:
1483   if (control_var)
1484     *control_var = op0;
1485   if (iv_var)
1486     *iv_var = iv0;
1487   if (bound)
1488     *bound = op1;
1489   if (iv_bound)
1490     *iv_bound = iv1;
1491
1492   return ret;
1493 }
1494
1495 /* Checks whether the condition in STMT is interesting and if so,
1496    records it.  */
1497
1498 static void
1499 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1500 {
1501   tree *var_p, *bound_p;
1502   struct iv *var_iv, *civ;
1503
1504   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1505     {
1506       find_interesting_uses_op (data, *var_p);
1507       find_interesting_uses_op (data, *bound_p);
1508       return;
1509     }
1510
1511   civ = XNEW (struct iv);
1512   *civ = *var_iv;
1513   record_use (data, NULL, civ, stmt, USE_COMPARE);
1514 }
1515
1516 /* Returns the outermost loop EXPR is obviously invariant in
1517    relative to the loop LOOP, i.e. if all its operands are defined
1518    outside of the returned loop.  Returns NULL if EXPR is not
1519    even obviously invariant in LOOP.  */
1520
1521 struct loop *
1522 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1523 {
1524   basic_block def_bb;
1525   unsigned i, len;
1526
1527   if (is_gimple_min_invariant (expr))
1528     return current_loops->tree_root;
1529
1530   if (TREE_CODE (expr) == SSA_NAME)
1531     {
1532       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1533       if (def_bb)
1534         {
1535           if (flow_bb_inside_loop_p (loop, def_bb))
1536             return NULL;
1537           return superloop_at_depth (loop,
1538                                      loop_depth (def_bb->loop_father) + 1);
1539         }
1540
1541       return current_loops->tree_root;
1542     }
1543
1544   if (!EXPR_P (expr))
1545     return NULL;
1546
1547   unsigned maxdepth = 0;
1548   len = TREE_OPERAND_LENGTH (expr);
1549   for (i = 0; i < len; i++)
1550     {
1551       struct loop *ivloop;
1552       if (!TREE_OPERAND (expr, i))
1553         continue;
1554
1555       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1556       if (!ivloop)
1557         return NULL;
1558       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1559     }
1560
1561   return superloop_at_depth (loop, maxdepth);
1562 }
1563
1564 /* Returns true if expression EXPR is obviously invariant in LOOP,
1565    i.e. if all its operands are defined outside of the LOOP.  LOOP
1566    should not be the function body.  */
1567
1568 bool
1569 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1570 {
1571   basic_block def_bb;
1572   unsigned i, len;
1573
1574   gcc_assert (loop_depth (loop) > 0);
1575
1576   if (is_gimple_min_invariant (expr))
1577     return true;
1578
1579   if (TREE_CODE (expr) == SSA_NAME)
1580     {
1581       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1582       if (def_bb
1583           && flow_bb_inside_loop_p (loop, def_bb))
1584         return false;
1585
1586       return true;
1587     }
1588
1589   if (!EXPR_P (expr))
1590     return false;
1591
1592   len = TREE_OPERAND_LENGTH (expr);
1593   for (i = 0; i < len; i++)
1594     if (TREE_OPERAND (expr, i)
1595         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1596       return false;
1597
1598   return true;
1599 }
1600
1601 /* Cumulates the steps of indices into DATA and replaces their values with the
1602    initial ones.  Returns false when the value of the index cannot be determined.
1603    Callback for for_each_index.  */
1604
1605 struct ifs_ivopts_data
1606 {
1607   struct ivopts_data *ivopts_data;
1608   gimple stmt;
1609   tree step;
1610 };
1611
1612 static bool
1613 idx_find_step (tree base, tree *idx, void *data)
1614 {
1615   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1616   struct iv *iv;
1617   tree step, iv_base, iv_step, lbound, off;
1618   struct loop *loop = dta->ivopts_data->current_loop;
1619
1620   /* If base is a component ref, require that the offset of the reference
1621      be invariant.  */
1622   if (TREE_CODE (base) == COMPONENT_REF)
1623     {
1624       off = component_ref_field_offset (base);
1625       return expr_invariant_in_loop_p (loop, off);
1626     }
1627
1628   /* If base is array, first check whether we will be able to move the
1629      reference out of the loop (in order to take its address in strength
1630      reduction).  In order for this to work we need both lower bound
1631      and step to be loop invariants.  */
1632   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1633     {
1634       /* Moreover, for a range, the size needs to be invariant as well.  */
1635       if (TREE_CODE (base) == ARRAY_RANGE_REF
1636           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1637         return false;
1638
1639       step = array_ref_element_size (base);
1640       lbound = array_ref_low_bound (base);
1641
1642       if (!expr_invariant_in_loop_p (loop, step)
1643           || !expr_invariant_in_loop_p (loop, lbound))
1644         return false;
1645     }
1646
1647   if (TREE_CODE (*idx) != SSA_NAME)
1648     return true;
1649
1650   iv = get_iv (dta->ivopts_data, *idx);
1651   if (!iv)
1652     return false;
1653
1654   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1655           *&x[0], which is not folded and does not trigger the
1656           ARRAY_REF path below.  */
1657   *idx = iv->base;
1658
1659   if (integer_zerop (iv->step))
1660     return true;
1661
1662   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1663     {
1664       step = array_ref_element_size (base);
1665
1666       /* We only handle addresses whose step is an integer constant.  */
1667       if (TREE_CODE (step) != INTEGER_CST)
1668         return false;
1669     }
1670   else
1671     /* The step for pointer arithmetics already is 1 byte.  */
1672     step = size_one_node;
1673
1674   iv_base = iv->base;
1675   iv_step = iv->step;
1676   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1677                             sizetype, &iv_base, &iv_step, dta->stmt,
1678                             false))
1679     {
1680       /* The index might wrap.  */
1681       return false;
1682     }
1683
1684   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1685   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1686
1687   return true;
1688 }
1689
1690 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1691    object is passed to it in DATA.  */
1692
1693 static bool
1694 idx_record_use (tree base, tree *idx,
1695                 void *vdata)
1696 {
1697   struct ivopts_data *data = (struct ivopts_data *) vdata;
1698   find_interesting_uses_op (data, *idx);
1699   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1700     {
1701       find_interesting_uses_op (data, array_ref_element_size (base));
1702       find_interesting_uses_op (data, array_ref_low_bound (base));
1703     }
1704   return true;
1705 }
1706
1707 /* If we can prove that TOP = cst * BOT for some constant cst,
1708    store cst to MUL and return true.  Otherwise return false.
1709    The returned value is always sign-extended, regardless of the
1710    signedness of TOP and BOT.  */
1711
1712 static bool
1713 constant_multiple_of (tree top, tree bot, widest_int *mul)
1714 {
1715   tree mby;
1716   enum tree_code code;
1717   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1718   widest_int res, p0, p1;
1719
1720   STRIP_NOPS (top);
1721   STRIP_NOPS (bot);
1722
1723   if (operand_equal_p (top, bot, 0))
1724     {
1725       *mul = 1;
1726       return true;
1727     }
1728
1729   code = TREE_CODE (top);
1730   switch (code)
1731     {
1732     case MULT_EXPR:
1733       mby = TREE_OPERAND (top, 1);
1734       if (TREE_CODE (mby) != INTEGER_CST)
1735         return false;
1736
1737       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1738         return false;
1739
1740       *mul = wi::sext (res * wi::to_widest (mby), precision);
1741       return true;
1742
1743     case PLUS_EXPR:
1744     case MINUS_EXPR:
1745       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1746           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1747         return false;
1748
1749       if (code == MINUS_EXPR)
1750         p1 = -p1;
1751       *mul = wi::sext (p0 + p1, precision);
1752       return true;
1753
1754     case INTEGER_CST:
1755       if (TREE_CODE (bot) != INTEGER_CST)
1756         return false;
1757
1758       p0 = widest_int::from (top, SIGNED);
1759       p1 = widest_int::from (bot, SIGNED);
1760       if (p1 == 0)
1761         return false;
1762       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1763       return res == 0;
1764
1765     default:
1766       return false;
1767     }
1768 }
1769
1770 /* Return true if memory reference REF with step STEP may be unaligned.  */
1771
1772 static bool
1773 may_be_unaligned_p (tree ref, tree step)
1774 {
1775   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1776      thus they are not misaligned.  */
1777   if (TREE_CODE (ref) == TARGET_MEM_REF)
1778     return false;
1779
1780   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1781   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1782     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1783
1784   unsigned HOST_WIDE_INT bitpos;
1785   unsigned int ref_align;
1786   get_object_alignment_1 (ref, &ref_align, &bitpos);
1787   if (ref_align < align
1788       || (bitpos % align) != 0
1789       || (bitpos % BITS_PER_UNIT) != 0)
1790     return true;
1791
1792   unsigned int trailing_zeros = tree_ctz (step);
1793   if (trailing_zeros < HOST_BITS_PER_INT
1794       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1795     return true;
1796
1797   return false;
1798 }
1799
1800 /* Return true if EXPR may be non-addressable.   */
1801
1802 bool
1803 may_be_nonaddressable_p (tree expr)
1804 {
1805   switch (TREE_CODE (expr))
1806     {
1807     case TARGET_MEM_REF:
1808       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1809          target, thus they are always addressable.  */
1810       return false;
1811
1812     case MEM_REF:
1813       /* Likewise for MEM_REFs, modulo the storage order.  */
1814       return REF_REVERSE_STORAGE_ORDER (expr);
1815
1816     case BIT_FIELD_REF:
1817       if (REF_REVERSE_STORAGE_ORDER (expr))
1818         return true;
1819       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1820
1821     case COMPONENT_REF:
1822       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
1823         return true;
1824       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1825              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1826
1827     case ARRAY_REF:
1828     case ARRAY_RANGE_REF:
1829       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
1830         return true;
1831       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1832
1833     case VIEW_CONVERT_EXPR:
1834       /* This kind of view-conversions may wrap non-addressable objects
1835          and make them look addressable.  After some processing the
1836          non-addressability may be uncovered again, causing ADDR_EXPRs
1837          of inappropriate objects to be built.  */
1838       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1839           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1840         return true;
1841       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1842
1843     CASE_CONVERT:
1844       return true;
1845
1846     default:
1847       break;
1848     }
1849
1850   return false;
1851 }
1852
1853 /* Finds addresses in *OP_P inside STMT.  */
1854
1855 static void
1856 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1857 {
1858   tree base = *op_p, step = size_zero_node;
1859   struct iv *civ;
1860   struct ifs_ivopts_data ifs_ivopts_data;
1861
1862   /* Do not play with volatile memory references.  A bit too conservative,
1863      perhaps, but safe.  */
1864   if (gimple_has_volatile_ops (stmt))
1865     goto fail;
1866
1867   /* Ignore bitfields for now.  Not really something terribly complicated
1868      to handle.  TODO.  */
1869   if (TREE_CODE (base) == BIT_FIELD_REF)
1870     goto fail;
1871
1872   base = unshare_expr (base);
1873
1874   if (TREE_CODE (base) == TARGET_MEM_REF)
1875     {
1876       tree type = build_pointer_type (TREE_TYPE (base));
1877       tree astep;
1878
1879       if (TMR_BASE (base)
1880           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1881         {
1882           civ = get_iv (data, TMR_BASE (base));
1883           if (!civ)
1884             goto fail;
1885
1886           TMR_BASE (base) = civ->base;
1887           step = civ->step;
1888         }
1889       if (TMR_INDEX2 (base)
1890           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1891         {
1892           civ = get_iv (data, TMR_INDEX2 (base));
1893           if (!civ)
1894             goto fail;
1895
1896           TMR_INDEX2 (base) = civ->base;
1897           step = civ->step;
1898         }
1899       if (TMR_INDEX (base)
1900           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1901         {
1902           civ = get_iv (data, TMR_INDEX (base));
1903           if (!civ)
1904             goto fail;
1905
1906           TMR_INDEX (base) = civ->base;
1907           astep = civ->step;
1908
1909           if (astep)
1910             {
1911               if (TMR_STEP (base))
1912                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1913
1914               step = fold_build2 (PLUS_EXPR, type, step, astep);
1915             }
1916         }
1917
1918       if (integer_zerop (step))
1919         goto fail;
1920       base = tree_mem_ref_addr (type, base);
1921     }
1922   else
1923     {
1924       ifs_ivopts_data.ivopts_data = data;
1925       ifs_ivopts_data.stmt = stmt;
1926       ifs_ivopts_data.step = size_zero_node;
1927       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1928           || integer_zerop (ifs_ivopts_data.step))
1929         goto fail;
1930       step = ifs_ivopts_data.step;
1931
1932       /* Check that the base expression is addressable.  This needs
1933          to be done after substituting bases of IVs into it.  */
1934       if (may_be_nonaddressable_p (base))
1935         goto fail;
1936
1937       /* Moreover, on strict alignment platforms, check that it is
1938          sufficiently aligned.  */
1939       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1940         goto fail;
1941
1942       base = build_fold_addr_expr (base);
1943
1944       /* Substituting bases of IVs into the base expression might
1945          have caused folding opportunities.  */
1946       if (TREE_CODE (base) == ADDR_EXPR)
1947         {
1948           tree *ref = &TREE_OPERAND (base, 0);
1949           while (handled_component_p (*ref))
1950             ref = &TREE_OPERAND (*ref, 0);
1951           if (TREE_CODE (*ref) == MEM_REF)
1952             {
1953               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1954                                       TREE_OPERAND (*ref, 0),
1955                                       TREE_OPERAND (*ref, 1));
1956               if (tem)
1957                 *ref = tem;
1958             }
1959         }
1960     }
1961
1962   civ = alloc_iv (base, step);
1963   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1964   return;
1965
1966 fail:
1967   for_each_index (op_p, idx_record_use, data);
1968 }
1969
1970 /* Finds and records invariants used in STMT.  */
1971
1972 static void
1973 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1974 {
1975   ssa_op_iter iter;
1976   use_operand_p use_p;
1977   tree op;
1978
1979   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1980     {
1981       op = USE_FROM_PTR (use_p);
1982       record_invariant (data, op, false);
1983     }
1984 }
1985
1986 /* Finds interesting uses of induction variables in the statement STMT.  */
1987
1988 static void
1989 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1990 {
1991   struct iv *iv;
1992   tree op, *lhs, *rhs;
1993   ssa_op_iter iter;
1994   use_operand_p use_p;
1995   enum tree_code code;
1996
1997   find_invariants_stmt (data, stmt);
1998
1999   if (gimple_code (stmt) == GIMPLE_COND)
2000     {
2001       find_interesting_uses_cond (data, stmt);
2002       return;
2003     }
2004
2005   if (is_gimple_assign (stmt))
2006     {
2007       lhs = gimple_assign_lhs_ptr (stmt);
2008       rhs = gimple_assign_rhs1_ptr (stmt);
2009
2010       if (TREE_CODE (*lhs) == SSA_NAME)
2011         {
2012           /* If the statement defines an induction variable, the uses are not
2013              interesting by themselves.  */
2014
2015           iv = get_iv (data, *lhs);
2016
2017           if (iv && !integer_zerop (iv->step))
2018             return;
2019         }
2020
2021       code = gimple_assign_rhs_code (stmt);
2022       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2023           && (REFERENCE_CLASS_P (*rhs)
2024               || is_gimple_val (*rhs)))
2025         {
2026           if (REFERENCE_CLASS_P (*rhs))
2027             find_interesting_uses_address (data, stmt, rhs);
2028           else
2029             find_interesting_uses_op (data, *rhs);
2030
2031           if (REFERENCE_CLASS_P (*lhs))
2032             find_interesting_uses_address (data, stmt, lhs);
2033           return;
2034         }
2035       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2036         {
2037           find_interesting_uses_cond (data, stmt);
2038           return;
2039         }
2040
2041       /* TODO -- we should also handle address uses of type
2042
2043          memory = call (whatever);
2044
2045          and
2046
2047          call (memory).  */
2048     }
2049
2050   if (gimple_code (stmt) == GIMPLE_PHI
2051       && gimple_bb (stmt) == data->current_loop->header)
2052     {
2053       iv = get_iv (data, PHI_RESULT (stmt));
2054
2055       if (iv && !integer_zerop (iv->step))
2056         return;
2057     }
2058
2059   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2060     {
2061       op = USE_FROM_PTR (use_p);
2062
2063       if (TREE_CODE (op) != SSA_NAME)
2064         continue;
2065
2066       iv = get_iv (data, op);
2067       if (!iv)
2068         continue;
2069
2070       find_interesting_uses_op (data, op);
2071     }
2072 }
2073
2074 /* Finds interesting uses of induction variables outside of loops
2075    on loop exit edge EXIT.  */
2076
2077 static void
2078 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2079 {
2080   gphi *phi;
2081   gphi_iterator psi;
2082   tree def;
2083
2084   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2085     {
2086       phi = psi.phi ();
2087       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2088       if (!virtual_operand_p (def))
2089         find_interesting_uses_op (data, def);
2090     }
2091 }
2092
2093 /* Finds uses of the induction variables that are interesting.  */
2094
2095 static void
2096 find_interesting_uses (struct ivopts_data *data)
2097 {
2098   basic_block bb;
2099   gimple_stmt_iterator bsi;
2100   basic_block *body = get_loop_body (data->current_loop);
2101   unsigned i;
2102   struct version_info *info;
2103   edge e;
2104
2105   if (dump_file && (dump_flags & TDF_DETAILS))
2106     fprintf (dump_file, "Uses:\n\n");
2107
2108   for (i = 0; i < data->current_loop->num_nodes; i++)
2109     {
2110       edge_iterator ei;
2111       bb = body[i];
2112
2113       FOR_EACH_EDGE (e, ei, bb->succs)
2114         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2115             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2116           find_interesting_uses_outside (data, e);
2117
2118       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2119         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2120       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2121         if (!is_gimple_debug (gsi_stmt (bsi)))
2122           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2123     }
2124
2125   if (dump_file && (dump_flags & TDF_DETAILS))
2126     {
2127       bitmap_iterator bi;
2128
2129       fprintf (dump_file, "\n");
2130
2131       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2132         {
2133           info = ver_info (data, i);
2134           if (info->inv_id)
2135             {
2136               fprintf (dump_file, "  ");
2137               print_generic_expr (dump_file, info->name, TDF_SLIM);
2138               fprintf (dump_file, " is invariant (%d)%s\n",
2139                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2140             }
2141         }
2142
2143       fprintf (dump_file, "\n");
2144     }
2145
2146   free (body);
2147 }
2148
2149 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2150    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2151    we are at the top-level of the processed address.  */
2152
2153 static tree
2154 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2155                 HOST_WIDE_INT *offset)
2156 {
2157   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2158   enum tree_code code;
2159   tree type, orig_type = TREE_TYPE (expr);
2160   HOST_WIDE_INT off0, off1, st;
2161   tree orig_expr = expr;
2162
2163   STRIP_NOPS (expr);
2164
2165   type = TREE_TYPE (expr);
2166   code = TREE_CODE (expr);
2167   *offset = 0;
2168
2169   switch (code)
2170     {
2171     case INTEGER_CST:
2172       if (!cst_and_fits_in_hwi (expr)
2173           || integer_zerop (expr))
2174         return orig_expr;
2175
2176       *offset = int_cst_value (expr);
2177       return build_int_cst (orig_type, 0);
2178
2179     case POINTER_PLUS_EXPR:
2180     case PLUS_EXPR:
2181     case MINUS_EXPR:
2182       op0 = TREE_OPERAND (expr, 0);
2183       op1 = TREE_OPERAND (expr, 1);
2184
2185       op0 = strip_offset_1 (op0, false, false, &off0);
2186       op1 = strip_offset_1 (op1, false, false, &off1);
2187
2188       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2189       if (op0 == TREE_OPERAND (expr, 0)
2190           && op1 == TREE_OPERAND (expr, 1))
2191         return orig_expr;
2192
2193       if (integer_zerop (op1))
2194         expr = op0;
2195       else if (integer_zerop (op0))
2196         {
2197           if (code == MINUS_EXPR)
2198             expr = fold_build1 (NEGATE_EXPR, type, op1);
2199           else
2200             expr = op1;
2201         }
2202       else
2203         expr = fold_build2 (code, type, op0, op1);
2204
2205       return fold_convert (orig_type, expr);
2206
2207     case MULT_EXPR:
2208       op1 = TREE_OPERAND (expr, 1);
2209       if (!cst_and_fits_in_hwi (op1))
2210         return orig_expr;
2211
2212       op0 = TREE_OPERAND (expr, 0);
2213       op0 = strip_offset_1 (op0, false, false, &off0);
2214       if (op0 == TREE_OPERAND (expr, 0))
2215         return orig_expr;
2216
2217       *offset = off0 * int_cst_value (op1);
2218       if (integer_zerop (op0))
2219         expr = op0;
2220       else
2221         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2222
2223       return fold_convert (orig_type, expr);
2224
2225     case ARRAY_REF:
2226     case ARRAY_RANGE_REF:
2227       if (!inside_addr)
2228         return orig_expr;
2229
2230       step = array_ref_element_size (expr);
2231       if (!cst_and_fits_in_hwi (step))
2232         break;
2233
2234       st = int_cst_value (step);
2235       op1 = TREE_OPERAND (expr, 1);
2236       op1 = strip_offset_1 (op1, false, false, &off1);
2237       *offset = off1 * st;
2238
2239       if (top_compref
2240           && integer_zerop (op1))
2241         {
2242           /* Strip the component reference completely.  */
2243           op0 = TREE_OPERAND (expr, 0);
2244           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2245           *offset += off0;
2246           return op0;
2247         }
2248       break;
2249
2250     case COMPONENT_REF:
2251       {
2252         tree field;
2253
2254         if (!inside_addr)
2255           return orig_expr;
2256
2257         tmp = component_ref_field_offset (expr);
2258         field = TREE_OPERAND (expr, 1);
2259         if (top_compref
2260             && cst_and_fits_in_hwi (tmp)
2261             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2262           {
2263             HOST_WIDE_INT boffset, abs_off;
2264
2265             /* Strip the component reference completely.  */
2266             op0 = TREE_OPERAND (expr, 0);
2267             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2268             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2269             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2270             if (boffset < 0)
2271               abs_off = -abs_off;
2272
2273             *offset = off0 + int_cst_value (tmp) + abs_off;
2274             return op0;
2275           }
2276       }
2277       break;
2278
2279     case ADDR_EXPR:
2280       op0 = TREE_OPERAND (expr, 0);
2281       op0 = strip_offset_1 (op0, true, true, &off0);
2282       *offset += off0;
2283
2284       if (op0 == TREE_OPERAND (expr, 0))
2285         return orig_expr;
2286
2287       expr = build_fold_addr_expr (op0);
2288       return fold_convert (orig_type, expr);
2289
2290     case MEM_REF:
2291       /* ???  Offset operand?  */
2292       inside_addr = false;
2293       break;
2294
2295     default:
2296       return orig_expr;
2297     }
2298
2299   /* Default handling of expressions for that we want to recurse into
2300      the first operand.  */
2301   op0 = TREE_OPERAND (expr, 0);
2302   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2303   *offset += off0;
2304
2305   if (op0 == TREE_OPERAND (expr, 0)
2306       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2307     return orig_expr;
2308
2309   expr = copy_node (expr);
2310   TREE_OPERAND (expr, 0) = op0;
2311   if (op1)
2312     TREE_OPERAND (expr, 1) = op1;
2313
2314   /* Inside address, we might strip the top level component references,
2315      thus changing type of the expression.  Handling of ADDR_EXPR
2316      will fix that.  */
2317   expr = fold_convert (orig_type, expr);
2318
2319   return expr;
2320 }
2321
2322 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2323
2324 static tree
2325 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2326 {
2327   HOST_WIDE_INT off;
2328   tree core = strip_offset_1 (expr, false, false, &off);
2329   *offset = off;
2330   return core;
2331 }
2332
2333 /* Returns variant of TYPE that can be used as base for different uses.
2334    We return unsigned type with the same precision, which avoids problems
2335    with overflows.  */
2336
2337 static tree
2338 generic_type_for (tree type)
2339 {
2340   if (POINTER_TYPE_P (type))
2341     return unsigned_type_for (type);
2342
2343   if (TYPE_UNSIGNED (type))
2344     return type;
2345
2346   return unsigned_type_for (type);
2347 }
2348
2349 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2350    the bitmap to that we should store it.  */
2351
2352 static struct ivopts_data *fd_ivopts_data;
2353 static tree
2354 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2355 {
2356   bitmap *depends_on = (bitmap *) data;
2357   struct version_info *info;
2358
2359   if (TREE_CODE (*expr_p) != SSA_NAME)
2360     return NULL_TREE;
2361   info = name_info (fd_ivopts_data, *expr_p);
2362
2363   if (!info->inv_id || info->has_nonlin_use)
2364     return NULL_TREE;
2365
2366   if (!*depends_on)
2367     *depends_on = BITMAP_ALLOC (NULL);
2368   bitmap_set_bit (*depends_on, info->inv_id);
2369
2370   return NULL_TREE;
2371 }
2372
2373 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2374    position to POS.  If USE is not NULL, the candidate is set as related to
2375    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2376    replacement of the final value of the iv by a direct computation.  */
2377
2378 static struct iv_cand *
2379 add_candidate_1 (struct ivopts_data *data,
2380                  tree base, tree step, bool important, enum iv_position pos,
2381                  struct iv_use *use, gimple incremented_at)
2382 {
2383   unsigned i;
2384   struct iv_cand *cand = NULL;
2385   tree type, orig_type;
2386
2387   /* For non-original variables, make sure their values are computed in a type
2388      that does not invoke undefined behavior on overflows (since in general,
2389      we cannot prove that these induction variables are non-wrapping).  */
2390   if (pos != IP_ORIGINAL)
2391     {
2392       orig_type = TREE_TYPE (base);
2393       type = generic_type_for (orig_type);
2394       if (type != orig_type)
2395         {
2396           base = fold_convert (type, base);
2397           step = fold_convert (type, step);
2398         }
2399     }
2400
2401   for (i = 0; i < n_iv_cands (data); i++)
2402     {
2403       cand = iv_cand (data, i);
2404
2405       if (cand->pos != pos)
2406         continue;
2407
2408       if (cand->incremented_at != incremented_at
2409           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2410               && cand->ainc_use != use))
2411         continue;
2412
2413       if (!cand->iv)
2414         {
2415           if (!base && !step)
2416             break;
2417
2418           continue;
2419         }
2420
2421       if (!base && !step)
2422         continue;
2423
2424       if (operand_equal_p (base, cand->iv->base, 0)
2425           && operand_equal_p (step, cand->iv->step, 0)
2426           && (TYPE_PRECISION (TREE_TYPE (base))
2427               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2428         break;
2429     }
2430
2431   if (i == n_iv_cands (data))
2432     {
2433       cand = XCNEW (struct iv_cand);
2434       cand->id = i;
2435
2436       if (!base && !step)
2437         cand->iv = NULL;
2438       else
2439         cand->iv = alloc_iv (base, step);
2440
2441       cand->pos = pos;
2442       if (pos != IP_ORIGINAL && cand->iv)
2443         {
2444           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2445           cand->var_after = cand->var_before;
2446         }
2447       cand->important = important;
2448       cand->incremented_at = incremented_at;
2449       data->iv_candidates.safe_push (cand);
2450
2451       if (step
2452           && TREE_CODE (step) != INTEGER_CST)
2453         {
2454           fd_ivopts_data = data;
2455           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2456         }
2457
2458       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2459         cand->ainc_use = use;
2460       else
2461         cand->ainc_use = NULL;
2462
2463       if (dump_file && (dump_flags & TDF_DETAILS))
2464         dump_cand (dump_file, cand);
2465     }
2466
2467   if (important && !cand->important)
2468     {
2469       cand->important = true;
2470       if (dump_file && (dump_flags & TDF_DETAILS))
2471         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2472     }
2473
2474   if (use)
2475     {
2476       bitmap_set_bit (use->related_cands, i);
2477       if (dump_file && (dump_flags & TDF_DETAILS))
2478         fprintf (dump_file, "Candidate %d is related to use %d\n",
2479                  cand->id, use->id);
2480     }
2481
2482   return cand;
2483 }
2484
2485 /* Returns true if incrementing the induction variable at the end of the LOOP
2486    is allowed.
2487
2488    The purpose is to avoid splitting latch edge with a biv increment, thus
2489    creating a jump, possibly confusing other optimization passes and leaving
2490    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2491    is not available (so we do not have a better alternative), or if the latch
2492    edge is already nonempty.  */
2493
2494 static bool
2495 allow_ip_end_pos_p (struct loop *loop)
2496 {
2497   if (!ip_normal_pos (loop))
2498     return true;
2499
2500   if (!empty_block_p (ip_end_pos (loop)))
2501     return true;
2502
2503   return false;
2504 }
2505
2506 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2507    Important field is set to IMPORTANT.  */
2508
2509 static void
2510 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2511                         bool important, struct iv_use *use)
2512 {
2513   basic_block use_bb = gimple_bb (use->stmt);
2514   machine_mode mem_mode;
2515   unsigned HOST_WIDE_INT cstepi;
2516
2517   /* If we insert the increment in any position other than the standard
2518      ones, we must ensure that it is incremented once per iteration.
2519      It must not be in an inner nested loop, or one side of an if
2520      statement.  */
2521   if (use_bb->loop_father != data->current_loop
2522       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2523       || stmt_could_throw_p (use->stmt)
2524       || !cst_and_fits_in_hwi (step))
2525     return;
2526
2527   cstepi = int_cst_value (step);
2528
2529   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2530   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2531         || USE_STORE_PRE_INCREMENT (mem_mode))
2532        && GET_MODE_SIZE (mem_mode) == cstepi)
2533       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2534            || USE_STORE_PRE_DECREMENT (mem_mode))
2535           && GET_MODE_SIZE (mem_mode) == -cstepi))
2536     {
2537       enum tree_code code = MINUS_EXPR;
2538       tree new_base;
2539       tree new_step = step;
2540
2541       if (POINTER_TYPE_P (TREE_TYPE (base)))
2542         {
2543           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2544           code = POINTER_PLUS_EXPR;
2545         }
2546       else
2547         new_step = fold_convert (TREE_TYPE (base), new_step);
2548       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2549       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2550                        use->stmt);
2551     }
2552   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2553         || USE_STORE_POST_INCREMENT (mem_mode))
2554        && GET_MODE_SIZE (mem_mode) == cstepi)
2555       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2556            || USE_STORE_POST_DECREMENT (mem_mode))
2557           && GET_MODE_SIZE (mem_mode) == -cstepi))
2558     {
2559       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2560                        use->stmt);
2561     }
2562 }
2563
2564 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2565    position to POS.  If USE is not NULL, the candidate is set as related to
2566    it.  The candidate computation is scheduled on all available positions.  */
2567
2568 static void
2569 add_candidate (struct ivopts_data *data,
2570                tree base, tree step, bool important, struct iv_use *use)
2571 {
2572   if (ip_normal_pos (data->current_loop))
2573     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2574   if (ip_end_pos (data->current_loop)
2575       && allow_ip_end_pos_p (data->current_loop))
2576     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2577
2578   if (use != NULL && use->type == USE_ADDRESS)
2579     add_autoinc_candidates (data, base, step, important, use);
2580 }
2581
2582 /* Adds standard iv candidates.  */
2583
2584 static void
2585 add_standard_iv_candidates (struct ivopts_data *data)
2586 {
2587   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2588
2589   /* The same for a double-integer type if it is still fast enough.  */
2590   if (TYPE_PRECISION
2591         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2592       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2593     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2594                    build_int_cst (long_integer_type_node, 1), true, NULL);
2595
2596   /* The same for a double-integer type if it is still fast enough.  */
2597   if (TYPE_PRECISION
2598         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2599       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2600     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2601                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2602 }
2603
2604
2605 /* Adds candidates bases on the old induction variable IV.  */
2606
2607 static void
2608 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2609 {
2610   gimple phi;
2611   tree def;
2612   struct iv_cand *cand;
2613
2614   add_candidate (data, iv->base, iv->step, true, NULL);
2615
2616   /* The same, but with initial value zero.  */
2617   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2618     add_candidate (data, size_int (0), iv->step, true, NULL);
2619   else
2620     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2621                    iv->step, true, NULL);
2622
2623   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2624   if (gimple_code (phi) == GIMPLE_PHI)
2625     {
2626       /* Additionally record the possibility of leaving the original iv
2627          untouched.  */
2628       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2629       /* Don't add candidate if it's from another PHI node because
2630          it's an affine iv appearing in the form of PEELED_CHREC.  */
2631       phi = SSA_NAME_DEF_STMT (def);
2632       if (gimple_code (phi) != GIMPLE_PHI)
2633         {
2634           cand = add_candidate_1 (data,
2635                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2636                                   SSA_NAME_DEF_STMT (def));
2637           cand->var_before = iv->ssa_name;
2638           cand->var_after = def;
2639         }
2640       else
2641         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2642     }
2643 }
2644
2645 /* Adds candidates based on the old induction variables.  */
2646
2647 static void
2648 add_old_ivs_candidates (struct ivopts_data *data)
2649 {
2650   unsigned i;
2651   struct iv *iv;
2652   bitmap_iterator bi;
2653
2654   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2655     {
2656       iv = ver_info (data, i)->iv;
2657       if (iv && iv->biv_p && !integer_zerop (iv->step))
2658         add_old_iv_candidates (data, iv);
2659     }
2660 }
2661
2662 /* Adds candidates based on the value of the induction variable IV and USE.  */
2663
2664 static void
2665 add_iv_value_candidates (struct ivopts_data *data,
2666                          struct iv *iv, struct iv_use *use)
2667 {
2668   unsigned HOST_WIDE_INT offset;
2669   tree base;
2670   tree basetype;
2671
2672   add_candidate (data, iv->base, iv->step, false, use);
2673
2674   /* The same, but with initial value zero.  Make such variable important,
2675      since it is generic enough so that possibly many uses may be based
2676      on it.  */
2677   basetype = TREE_TYPE (iv->base);
2678   if (POINTER_TYPE_P (basetype))
2679     basetype = sizetype;
2680   add_candidate (data, build_int_cst (basetype, 0),
2681                  iv->step, true, use);
2682
2683   /* Third, try removing the constant offset.  Make sure to even
2684      add a candidate for &a[0] vs. (T *)&a.  */
2685   base = strip_offset (iv->base, &offset);
2686   if (offset
2687       || base != iv->base)
2688     add_candidate (data, base, iv->step, false, use);
2689 }
2690
2691 /* Adds candidates based on the uses.  */
2692
2693 static void
2694 add_derived_ivs_candidates (struct ivopts_data *data)
2695 {
2696   unsigned i;
2697
2698   for (i = 0; i < n_iv_uses (data); i++)
2699     {
2700       struct iv_use *use = iv_use (data, i);
2701
2702       if (!use)
2703         continue;
2704
2705       switch (use->type)
2706         {
2707         case USE_NONLINEAR_EXPR:
2708         case USE_COMPARE:
2709         case USE_ADDRESS:
2710           /* Just add the ivs based on the value of the iv used here.  */
2711           add_iv_value_candidates (data, use->iv, use);
2712           break;
2713
2714         default:
2715           gcc_unreachable ();
2716         }
2717     }
2718 }
2719
2720 /* Record important candidates and add them to related_cands bitmaps
2721    if needed.  */
2722
2723 static void
2724 record_important_candidates (struct ivopts_data *data)
2725 {
2726   unsigned i;
2727   struct iv_use *use;
2728
2729   for (i = 0; i < n_iv_cands (data); i++)
2730     {
2731       struct iv_cand *cand = iv_cand (data, i);
2732
2733       if (cand->important)
2734         bitmap_set_bit (data->important_candidates, i);
2735     }
2736
2737   data->consider_all_candidates = (n_iv_cands (data)
2738                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2739
2740   if (data->consider_all_candidates)
2741     {
2742       /* We will not need "related_cands" bitmaps in this case,
2743          so release them to decrease peak memory consumption.  */
2744       for (i = 0; i < n_iv_uses (data); i++)
2745         {
2746           use = iv_use (data, i);
2747           BITMAP_FREE (use->related_cands);
2748         }
2749     }
2750   else
2751     {
2752       /* Add important candidates to the related_cands bitmaps.  */
2753       for (i = 0; i < n_iv_uses (data); i++)
2754         bitmap_ior_into (iv_use (data, i)->related_cands,
2755                          data->important_candidates);
2756     }
2757 }
2758
2759 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2760    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2761    we allocate a simple list to every use.  */
2762
2763 static void
2764 alloc_use_cost_map (struct ivopts_data *data)
2765 {
2766   unsigned i, size, s;
2767
2768   for (i = 0; i < n_iv_uses (data); i++)
2769     {
2770       struct iv_use *use = iv_use (data, i);
2771
2772       if (data->consider_all_candidates)
2773         size = n_iv_cands (data);
2774       else
2775         {
2776           s = bitmap_count_bits (use->related_cands);
2777
2778           /* Round up to the power of two, so that moduling by it is fast.  */
2779           size = s ? (1 << ceil_log2 (s)) : 1;
2780         }
2781
2782       use->n_map_members = size;
2783       use->cost_map = XCNEWVEC (struct cost_pair, size);
2784     }
2785 }
2786
2787 /* Returns description of computation cost of expression whose runtime
2788    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2789
2790 static comp_cost
2791 new_cost (unsigned runtime, unsigned complexity)
2792 {
2793   comp_cost cost;
2794
2795   cost.cost = runtime;
2796   cost.complexity = complexity;
2797
2798   return cost;
2799 }
2800
2801 /* Adds costs COST1 and COST2.  */
2802
2803 static comp_cost
2804 add_costs (comp_cost cost1, comp_cost cost2)
2805 {
2806   cost1.cost += cost2.cost;
2807   cost1.complexity += cost2.complexity;
2808
2809   return cost1;
2810 }
2811 /* Subtracts costs COST1 and COST2.  */
2812
2813 static comp_cost
2814 sub_costs (comp_cost cost1, comp_cost cost2)
2815 {
2816   cost1.cost -= cost2.cost;
2817   cost1.complexity -= cost2.complexity;
2818
2819   return cost1;
2820 }
2821
2822 /* Returns a negative number if COST1 < COST2, a positive number if
2823    COST1 > COST2, and 0 if COST1 = COST2.  */
2824
2825 static int
2826 compare_costs (comp_cost cost1, comp_cost cost2)
2827 {
2828   if (cost1.cost == cost2.cost)
2829     return cost1.complexity - cost2.complexity;
2830
2831   return cost1.cost - cost2.cost;
2832 }
2833
2834 /* Returns true if COST is infinite.  */
2835
2836 static bool
2837 infinite_cost_p (comp_cost cost)
2838 {
2839   return cost.cost == INFTY;
2840 }
2841
2842 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2843    on invariants DEPENDS_ON and that the value used in expressing it
2844    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2845
2846 static void
2847 set_use_iv_cost (struct ivopts_data *data,
2848                  struct iv_use *use, struct iv_cand *cand,
2849                  comp_cost cost, bitmap depends_on, tree value,
2850                  enum tree_code comp, int inv_expr_id)
2851 {
2852   unsigned i, s;
2853
2854   if (infinite_cost_p (cost))
2855     {
2856       BITMAP_FREE (depends_on);
2857       return;
2858     }
2859
2860   if (data->consider_all_candidates)
2861     {
2862       use->cost_map[cand->id].cand = cand;
2863       use->cost_map[cand->id].cost = cost;
2864       use->cost_map[cand->id].depends_on = depends_on;
2865       use->cost_map[cand->id].value = value;
2866       use->cost_map[cand->id].comp = comp;
2867       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2868       return;
2869     }
2870
2871   /* n_map_members is a power of two, so this computes modulo.  */
2872   s = cand->id & (use->n_map_members - 1);
2873   for (i = s; i < use->n_map_members; i++)
2874     if (!use->cost_map[i].cand)
2875       goto found;
2876   for (i = 0; i < s; i++)
2877     if (!use->cost_map[i].cand)
2878       goto found;
2879
2880   gcc_unreachable ();
2881
2882 found:
2883   use->cost_map[i].cand = cand;
2884   use->cost_map[i].cost = cost;
2885   use->cost_map[i].depends_on = depends_on;
2886   use->cost_map[i].value = value;
2887   use->cost_map[i].comp = comp;
2888   use->cost_map[i].inv_expr_id = inv_expr_id;
2889 }
2890
2891 /* Gets cost of (USE, CANDIDATE) pair.  */
2892
2893 static struct cost_pair *
2894 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2895                  struct iv_cand *cand)
2896 {
2897   unsigned i, s;
2898   struct cost_pair *ret;
2899
2900   if (!cand)
2901     return NULL;
2902
2903   if (data->consider_all_candidates)
2904     {
2905       ret = use->cost_map + cand->id;
2906       if (!ret->cand)
2907         return NULL;
2908
2909       return ret;
2910     }
2911
2912   /* n_map_members is a power of two, so this computes modulo.  */
2913   s = cand->id & (use->n_map_members - 1);
2914   for (i = s; i < use->n_map_members; i++)
2915     if (use->cost_map[i].cand == cand)
2916       return use->cost_map + i;
2917     else if (use->cost_map[i].cand == NULL)
2918       return NULL;
2919   for (i = 0; i < s; i++)
2920     if (use->cost_map[i].cand == cand)
2921       return use->cost_map + i;
2922     else if (use->cost_map[i].cand == NULL)
2923       return NULL;
2924
2925   return NULL;
2926 }
2927
2928 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2929 static rtx
2930 produce_memory_decl_rtl (tree obj, int *regno)
2931 {
2932   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2933   machine_mode address_mode = targetm.addr_space.address_mode (as);
2934   rtx x;
2935
2936   gcc_assert (obj);
2937   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2938     {
2939       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2940       x = gen_rtx_SYMBOL_REF (address_mode, name);
2941       SET_SYMBOL_REF_DECL (x, obj);
2942       x = gen_rtx_MEM (DECL_MODE (obj), x);
2943       set_mem_addr_space (x, as);
2944       targetm.encode_section_info (obj, x, true);
2945     }
2946   else
2947     {
2948       x = gen_raw_REG (address_mode, (*regno)++);
2949       x = gen_rtx_MEM (DECL_MODE (obj), x);
2950       set_mem_addr_space (x, as);
2951     }
2952
2953   return x;
2954 }
2955
2956 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2957    walk_tree.  DATA contains the actual fake register number.  */
2958
2959 static tree
2960 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2961 {
2962   tree obj = NULL_TREE;
2963   rtx x = NULL_RTX;
2964   int *regno = (int *) data;
2965
2966   switch (TREE_CODE (*expr_p))
2967     {
2968     case ADDR_EXPR:
2969       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2970            handled_component_p (*expr_p);
2971            expr_p = &TREE_OPERAND (*expr_p, 0))
2972         continue;
2973       obj = *expr_p;
2974       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2975         x = produce_memory_decl_rtl (obj, regno);
2976       break;
2977
2978     case SSA_NAME:
2979       *ws = 0;
2980       obj = SSA_NAME_VAR (*expr_p);
2981       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2982       if (!obj)
2983         return NULL_TREE;
2984       if (!DECL_RTL_SET_P (obj))
2985         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2986       break;
2987
2988     case VAR_DECL:
2989     case PARM_DECL:
2990     case RESULT_DECL:
2991       *ws = 0;
2992       obj = *expr_p;
2993
2994       if (DECL_RTL_SET_P (obj))
2995         break;
2996
2997       if (DECL_MODE (obj) == BLKmode)
2998         x = produce_memory_decl_rtl (obj, regno);
2999       else
3000         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3001
3002       break;
3003
3004     default:
3005       break;
3006     }
3007
3008   if (x)
3009     {
3010       decl_rtl_to_reset.safe_push (obj);
3011       SET_DECL_RTL (obj, x);
3012     }
3013
3014   return NULL_TREE;
3015 }
3016
3017 /* Determines cost of the computation of EXPR.  */
3018
3019 static unsigned
3020 computation_cost (tree expr, bool speed)
3021 {
3022   rtx_insn *seq;
3023   rtx rslt;
3024   tree type = TREE_TYPE (expr);
3025   unsigned cost;
3026   /* Avoid using hard regs in ways which may be unsupported.  */
3027   int regno = LAST_VIRTUAL_REGISTER + 1;
3028   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3029   enum node_frequency real_frequency = node->frequency;
3030
3031   node->frequency = NODE_FREQUENCY_NORMAL;
3032   crtl->maybe_hot_insn_p = speed;
3033   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3034   start_sequence ();
3035   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3036   seq = get_insns ();
3037   end_sequence ();
3038   default_rtl_profile ();
3039   node->frequency = real_frequency;
3040
3041   cost = seq_cost (seq, speed);
3042   if (MEM_P (rslt))
3043     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3044                           TYPE_ADDR_SPACE (type), speed);
3045   else if (!REG_P (rslt))
3046     cost += set_src_cost (rslt, speed);
3047
3048   return cost;
3049 }
3050
3051 /* Returns variable containing the value of candidate CAND at statement AT.  */
3052
3053 static tree
3054 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3055 {
3056   if (stmt_after_increment (loop, cand, stmt))
3057     return cand->var_after;
3058   else
3059     return cand->var_before;
3060 }
3061
3062 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3063    same precision that is at least as wide as the precision of TYPE, stores
3064    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3065    type of A and B.  */
3066
3067 static tree
3068 determine_common_wider_type (tree *a, tree *b)
3069 {
3070   tree wider_type = NULL;
3071   tree suba, subb;
3072   tree atype = TREE_TYPE (*a);
3073
3074   if (CONVERT_EXPR_P (*a))
3075     {
3076       suba = TREE_OPERAND (*a, 0);
3077       wider_type = TREE_TYPE (suba);
3078       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3079         return atype;
3080     }
3081   else
3082     return atype;
3083
3084   if (CONVERT_EXPR_P (*b))
3085     {
3086       subb = TREE_OPERAND (*b, 0);
3087       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3088         return atype;
3089     }
3090   else
3091     return atype;
3092
3093   *a = suba;
3094   *b = subb;
3095   return wider_type;
3096 }
3097
3098 /* Determines the expression by that USE is expressed from induction variable
3099    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3100    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3101
3102 static bool
3103 get_computation_aff (struct loop *loop,
3104                      struct iv_use *use, struct iv_cand *cand, gimple at,
3105                      struct aff_tree *aff)
3106 {
3107   tree ubase = use->iv->base;
3108   tree ustep = use->iv->step;
3109   tree cbase = cand->iv->base;
3110   tree cstep = cand->iv->step, cstep_common;
3111   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3112   tree common_type, var;
3113   tree uutype;
3114   aff_tree cbase_aff, var_aff;
3115   widest_int rat;
3116
3117   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3118     {
3119       /* We do not have a precision to express the values of use.  */
3120       return false;
3121     }
3122
3123   var = var_at_stmt (loop, cand, at);
3124   uutype = unsigned_type_for (utype);
3125
3126   /* If the conversion is not noop, perform it.  */
3127   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3128     {
3129       cstep = fold_convert (uutype, cstep);
3130       cbase = fold_convert (uutype, cbase);
3131       var = fold_convert (uutype, var);
3132     }
3133
3134   if (!constant_multiple_of (ustep, cstep, &rat))
3135     return false;
3136
3137   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3138      type, we achieve better folding by computing their difference in this
3139      wider type, and cast the result to UUTYPE.  We do not need to worry about
3140      overflows, as all the arithmetics will in the end be performed in UUTYPE
3141      anyway.  */
3142   common_type = determine_common_wider_type (&ubase, &cbase);
3143
3144   /* use = ubase - ratio * cbase + ratio * var.  */
3145   tree_to_aff_combination (ubase, common_type, aff);
3146   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3147   tree_to_aff_combination (var, uutype, &var_aff);
3148
3149   /* We need to shift the value if we are after the increment.  */
3150   if (stmt_after_increment (loop, cand, at))
3151     {
3152       aff_tree cstep_aff;
3153
3154       if (common_type != uutype)
3155         cstep_common = fold_convert (common_type, cstep);
3156       else
3157         cstep_common = cstep;
3158
3159       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3160       aff_combination_add (&cbase_aff, &cstep_aff);
3161     }
3162
3163   aff_combination_scale (&cbase_aff, -rat);
3164   aff_combination_add (aff, &cbase_aff);
3165   if (common_type != uutype)
3166     aff_combination_convert (aff, uutype);
3167
3168   aff_combination_scale (&var_aff, rat);
3169   aff_combination_add (aff, &var_aff);
3170
3171   return true;
3172 }
3173
3174 /* Return the type of USE.  */
3175
3176 static tree
3177 get_use_type (struct iv_use *use)
3178 {
3179   tree base_type = TREE_TYPE (use->iv->base);
3180   tree type;
3181
3182   if (use->type == USE_ADDRESS)
3183     {
3184       /* The base_type may be a void pointer.  Create a pointer type based on
3185          the mem_ref instead.  */
3186       type = build_pointer_type (TREE_TYPE (*use->op_p));
3187       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3188                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3189     }
3190   else
3191     type = base_type;
3192
3193   return type;
3194 }
3195
3196 /* Determines the expression by that USE is expressed from induction variable
3197    CAND at statement AT in LOOP.  The computation is unshared.  */
3198
3199 static tree
3200 get_computation_at (struct loop *loop,
3201                     struct iv_use *use, struct iv_cand *cand, gimple at)
3202 {
3203   aff_tree aff;
3204   tree type = get_use_type (use);
3205
3206   if (!get_computation_aff (loop, use, cand, at, &aff))
3207     return NULL_TREE;
3208   unshare_aff_combination (&aff);
3209   return fold_convert (type, aff_combination_to_tree (&aff));
3210 }
3211
3212 /* Determines the expression by that USE is expressed from induction variable
3213    CAND in LOOP.  The computation is unshared.  */
3214
3215 static tree
3216 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3217 {
3218   return get_computation_at (loop, use, cand, use->stmt);
3219 }
3220
3221 /* Adjust the cost COST for being in loop setup rather than loop body.
3222    If we're optimizing for space, the loop setup overhead is constant;
3223    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3224 static unsigned
3225 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3226 {
3227   if (cost == INFTY)
3228     return cost;
3229   else if (optimize_loop_for_speed_p (data->current_loop))
3230     return cost / avg_loop_niter (data->current_loop);
3231   else
3232     return cost;
3233 }
3234
3235 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3236    validity for a memory reference accessing memory of mode MODE in
3237    address space AS.  */
3238
3239
3240 bool
3241 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3242                                  addr_space_t as)
3243 {
3244 #define MAX_RATIO 128
3245   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3246   static vec<sbitmap> valid_mult_list;
3247   sbitmap valid_mult;
3248
3249   if (data_index >= valid_mult_list.length ())
3250     valid_mult_list.safe_grow_cleared (data_index + 1);
3251
3252   valid_mult = valid_mult_list[data_index];
3253   if (!valid_mult)
3254     {
3255       machine_mode address_mode = targetm.addr_space.address_mode (as);
3256       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3257       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3258       rtx addr, scaled;
3259       HOST_WIDE_INT i;
3260
3261       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3262       bitmap_clear (valid_mult);
3263       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3264       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3265       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3266         {
3267           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3268           if (memory_address_addr_space_p (mode, addr, as)
3269               || memory_address_addr_space_p (mode, scaled, as))
3270             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3271         }
3272
3273       if (dump_file && (dump_flags & TDF_DETAILS))
3274         {
3275           fprintf (dump_file, "  allowed multipliers:");
3276           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3277             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3278               fprintf (dump_file, " %d", (int) i);
3279           fprintf (dump_file, "\n");
3280           fprintf (dump_file, "\n");
3281         }
3282
3283       valid_mult_list[data_index] = valid_mult;
3284     }
3285
3286   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3287     return false;
3288
3289   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3290 }
3291
3292 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3293    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3294    variable is omitted.  Compute the cost for a memory reference that accesses
3295    a memory location of mode MEM_MODE in address space AS.
3296
3297    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3298    size of MEM_MODE / RATIO) is available.  To make this determination, we
3299    look at the size of the increment to be made, which is given in CSTEP.
3300    CSTEP may be zero if the step is unknown.
3301    STMT_AFTER_INC is true iff the statement we're looking at is after the
3302    increment of the original biv.
3303
3304    TODO -- there must be some better way.  This all is quite crude.  */
3305
3306 enum ainc_type
3307 {
3308   AINC_PRE_INC,         /* Pre increment.  */
3309   AINC_PRE_DEC,         /* Pre decrement.  */
3310   AINC_POST_INC,        /* Post increment.  */
3311   AINC_POST_DEC,        /* Post decrement.  */
3312   AINC_NONE             /* Also the number of auto increment types.  */
3313 };
3314
3315 typedef struct address_cost_data_s
3316 {
3317   HOST_WIDE_INT min_offset, max_offset;
3318   unsigned costs[2][2][2][2];
3319   unsigned ainc_costs[AINC_NONE];
3320 } *address_cost_data;
3321
3322
3323 static comp_cost
3324 get_address_cost (bool symbol_present, bool var_present,
3325                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3326                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3327                   addr_space_t as, bool speed,
3328                   bool stmt_after_inc, bool *may_autoinc)
3329 {
3330   machine_mode address_mode = targetm.addr_space.address_mode (as);
3331   static vec<address_cost_data> address_cost_data_list;
3332   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3333   address_cost_data data;
3334   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3335   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3336   unsigned cost, acost, complexity;
3337   enum ainc_type autoinc_type;
3338   bool offset_p, ratio_p, autoinc;
3339   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3340   unsigned HOST_WIDE_INT mask;
3341   unsigned bits;
3342
3343   if (data_index >= address_cost_data_list.length ())
3344     address_cost_data_list.safe_grow_cleared (data_index + 1);
3345
3346   data = address_cost_data_list[data_index];
3347   if (!data)
3348     {
3349       HOST_WIDE_INT i;
3350       HOST_WIDE_INT rat, off = 0;
3351       int old_cse_not_expected, width;
3352       unsigned sym_p, var_p, off_p, rat_p, add_c;
3353       rtx_insn *seq;
3354       rtx addr, base;
3355       rtx reg0, reg1;
3356
3357       data = (address_cost_data) xcalloc (1, sizeof (*data));
3358
3359       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3360
3361       width = GET_MODE_BITSIZE (address_mode) - 1;
3362       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3363         width = HOST_BITS_PER_WIDE_INT - 1;
3364       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3365
3366       for (i = width; i >= 0; i--)
3367         {
3368           off = -((unsigned HOST_WIDE_INT) 1 << i);
3369           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3370           if (memory_address_addr_space_p (mem_mode, addr, as))
3371             break;
3372         }
3373       data->min_offset = (i == -1? 0 : off);
3374
3375       for (i = width; i >= 0; i--)
3376         {
3377           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3378           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3379           if (memory_address_addr_space_p (mem_mode, addr, as))
3380             break;
3381           /* For some strict-alignment targets, the offset must be naturally
3382              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3383           off = mem_mode != QImode
3384                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3385                     - GET_MODE_SIZE (mem_mode)
3386                 : 0;
3387           if (off > 0)
3388             {
3389               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3390               if (memory_address_addr_space_p (mem_mode, addr, as))
3391                 break;
3392             }
3393         }
3394       if (i == -1)
3395         off = 0;
3396       data->max_offset = off;
3397
3398       if (dump_file && (dump_flags & TDF_DETAILS))
3399         {
3400           fprintf (dump_file, "get_address_cost:\n");
3401           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3402                    GET_MODE_NAME (mem_mode),
3403                    data->min_offset);
3404           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3405                    GET_MODE_NAME (mem_mode),
3406                    data->max_offset);
3407         }
3408
3409       rat = 1;
3410       for (i = 2; i <= MAX_RATIO; i++)
3411         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3412           {
3413             rat = i;
3414             break;
3415           }
3416
3417       /* Compute the cost of various addressing modes.  */
3418       acost = 0;
3419       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3420       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3421
3422       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3423           || USE_STORE_PRE_DECREMENT (mem_mode))
3424         {
3425           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3426           has_predec[mem_mode]
3427             = memory_address_addr_space_p (mem_mode, addr, as);
3428
3429           if (has_predec[mem_mode])
3430             data->ainc_costs[AINC_PRE_DEC]
3431               = address_cost (addr, mem_mode, as, speed);
3432         }
3433       if (USE_LOAD_POST_DECREMENT (mem_mode)
3434           || USE_STORE_POST_DECREMENT (mem_mode))
3435         {
3436           addr = gen_rtx_POST_DEC (address_mode, reg0);
3437           has_postdec[mem_mode]
3438             = memory_address_addr_space_p (mem_mode, addr, as);
3439
3440           if (has_postdec[mem_mode])
3441             data->ainc_costs[AINC_POST_DEC]
3442               = address_cost (addr, mem_mode, as, speed);
3443         }
3444       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3445           || USE_STORE_PRE_DECREMENT (mem_mode))
3446         {
3447           addr = gen_rtx_PRE_INC (address_mode, reg0);
3448           has_preinc[mem_mode]
3449             = memory_address_addr_space_p (mem_mode, addr, as);
3450
3451           if (has_preinc[mem_mode])
3452             data->ainc_costs[AINC_PRE_INC]
3453               = address_cost (addr, mem_mode, as, speed);
3454         }
3455       if (USE_LOAD_POST_INCREMENT (mem_mode)
3456           || USE_STORE_POST_INCREMENT (mem_mode))
3457         {
3458           addr = gen_rtx_POST_INC (address_mode, reg0);
3459           has_postinc[mem_mode]
3460             = memory_address_addr_space_p (mem_mode, addr, as);
3461
3462           if (has_postinc[mem_mode])
3463             data->ainc_costs[AINC_POST_INC]
3464               = address_cost (addr, mem_mode, as, speed);
3465         }
3466       for (i = 0; i < 16; i++)
3467         {
3468           sym_p = i & 1;
3469           var_p = (i >> 1) & 1;
3470           off_p = (i >> 2) & 1;
3471           rat_p = (i >> 3) & 1;
3472
3473           addr = reg0;
3474           if (rat_p)
3475             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3476                                    gen_int_mode (rat, address_mode));
3477
3478           if (var_p)
3479             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3480
3481           if (sym_p)
3482             {
3483               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3484               /* ??? We can run into trouble with some backends by presenting
3485                  it with symbols which haven't been properly passed through
3486                  targetm.encode_section_info.  By setting the local bit, we
3487                  enhance the probability of things working.  */
3488               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3489
3490               if (off_p)
3491                 base = gen_rtx_fmt_e (CONST, address_mode,
3492                                       gen_rtx_fmt_ee
3493                                         (PLUS, address_mode, base,
3494                                          gen_int_mode (off, address_mode)));
3495             }
3496           else if (off_p)
3497             base = gen_int_mode (off, address_mode);
3498           else
3499             base = NULL_RTX;
3500
3501           if (base)
3502             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3503
3504           start_sequence ();
3505           /* To avoid splitting addressing modes, pretend that no cse will
3506              follow.  */
3507           old_cse_not_expected = cse_not_expected;
3508           cse_not_expected = true;
3509           addr = memory_address_addr_space (mem_mode, addr, as);
3510           cse_not_expected = old_cse_not_expected;
3511           seq = get_insns ();
3512           end_sequence ();
3513
3514           acost = seq_cost (seq, speed);
3515           acost += address_cost (addr, mem_mode, as, speed);
3516
3517           if (!acost)
3518             acost = 1;
3519           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3520         }
3521
3522       /* On some targets, it is quite expensive to load symbol to a register,
3523          which makes addresses that contain symbols look much more expensive.
3524          However, the symbol will have to be loaded in any case before the
3525          loop (and quite likely we have it in register already), so it does not
3526          make much sense to penalize them too heavily.  So make some final
3527          tweaks for the SYMBOL_PRESENT modes:
3528
3529          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3530          var is cheaper, use this mode with small penalty.
3531          If VAR_PRESENT is true, try whether the mode with
3532          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3533          if this is the case, use it.  */
3534       add_c = add_cost (speed, address_mode);
3535       for (i = 0; i < 8; i++)
3536         {
3537           var_p = i & 1;
3538           off_p = (i >> 1) & 1;
3539           rat_p = (i >> 2) & 1;
3540
3541           acost = data->costs[0][1][off_p][rat_p] + 1;
3542           if (var_p)
3543             acost += add_c;
3544
3545           if (acost < data->costs[1][var_p][off_p][rat_p])
3546             data->costs[1][var_p][off_p][rat_p] = acost;
3547         }
3548
3549       if (dump_file && (dump_flags & TDF_DETAILS))
3550         {
3551           fprintf (dump_file, "Address costs:\n");
3552
3553           for (i = 0; i < 16; i++)
3554             {
3555               sym_p = i & 1;
3556               var_p = (i >> 1) & 1;
3557               off_p = (i >> 2) & 1;
3558               rat_p = (i >> 3) & 1;
3559
3560               fprintf (dump_file, "  ");
3561               if (sym_p)
3562                 fprintf (dump_file, "sym + ");
3563               if (var_p)
3564                 fprintf (dump_file, "var + ");
3565               if (off_p)
3566                 fprintf (dump_file, "cst + ");
3567               if (rat_p)
3568                 fprintf (dump_file, "rat * ");
3569
3570               acost = data->costs[sym_p][var_p][off_p][rat_p];
3571               fprintf (dump_file, "index costs %d\n", acost);
3572             }
3573           if (has_predec[mem_mode] || has_postdec[mem_mode]
3574               || has_preinc[mem_mode] || has_postinc[mem_mode])
3575             fprintf (dump_file, "  May include autoinc/dec\n");
3576           fprintf (dump_file, "\n");
3577         }
3578
3579       address_cost_data_list[data_index] = data;
3580     }
3581
3582   bits = GET_MODE_BITSIZE (address_mode);
3583   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3584   offset &= mask;
3585   if ((offset >> (bits - 1) & 1))
3586     offset |= ~mask;
3587   s_offset = offset;
3588
3589   autoinc = false;
3590   autoinc_type = AINC_NONE;
3591   msize = GET_MODE_SIZE (mem_mode);
3592   autoinc_offset = offset;
3593   if (stmt_after_inc)
3594     autoinc_offset += ratio * cstep;
3595   if (symbol_present || var_present || ratio != 1)
3596     autoinc = false;
3597   else
3598     {
3599       if (has_postinc[mem_mode] && autoinc_offset == 0
3600           && msize == cstep)
3601         autoinc_type = AINC_POST_INC;
3602       else if (has_postdec[mem_mode] && autoinc_offset == 0
3603                && msize == -cstep)
3604         autoinc_type = AINC_POST_DEC;
3605       else if (has_preinc[mem_mode] && autoinc_offset == msize
3606                && msize == cstep)
3607         autoinc_type = AINC_PRE_INC;
3608       else if (has_predec[mem_mode] && autoinc_offset == -msize
3609                && msize == -cstep)
3610         autoinc_type = AINC_PRE_DEC;
3611
3612       if (autoinc_type != AINC_NONE)
3613         autoinc = true;
3614     }
3615
3616   cost = 0;
3617   offset_p = (s_offset != 0
3618               && data->min_offset <= s_offset
3619               && s_offset <= data->max_offset);
3620   ratio_p = (ratio != 1
3621              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3622
3623   if (ratio != 1 && !ratio_p)
3624     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3625
3626   if (s_offset && !offset_p && !symbol_present)
3627     cost += add_cost (speed, address_mode);
3628
3629   if (may_autoinc)
3630     *may_autoinc = autoinc;
3631   if (autoinc)
3632     acost = data->ainc_costs[autoinc_type];
3633   else
3634     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3635   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3636   return new_cost (cost + acost, complexity);
3637 }
3638
3639  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3640     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3641     calculating the operands of EXPR.  Returns true if successful, and returns
3642     the cost in COST.  */
3643
3644 static bool
3645 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3646                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3647 {
3648   comp_cost res;
3649   tree op1 = TREE_OPERAND (expr, 1);
3650   tree cst = TREE_OPERAND (mult, 1);
3651   tree multop = TREE_OPERAND (mult, 0);
3652   int m = exact_log2 (int_cst_value (cst));
3653   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3654   int as_cost, sa_cost;
3655   bool mult_in_op1;
3656
3657   if (!(m >= 0 && m < maxm))
3658     return false;
3659
3660   mult_in_op1 = operand_equal_p (op1, mult, 0);
3661
3662   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3663
3664   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3665      use that in preference to a shift insn followed by an add insn.  */
3666   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3667              ? shiftadd_cost (speed, mode, m)
3668              : (mult_in_op1
3669                 ? shiftsub1_cost (speed, mode, m)
3670                 : shiftsub0_cost (speed, mode, m)));
3671
3672   res = new_cost (MIN (as_cost, sa_cost), 0);
3673   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3674
3675   STRIP_NOPS (multop);
3676   if (!is_gimple_val (multop))
3677     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3678
3679   *cost = res;
3680   return true;
3681 }
3682
3683 /* Estimates cost of forcing expression EXPR into a variable.  */
3684
3685 static comp_cost
3686 force_expr_to_var_cost (tree expr, bool speed)
3687 {
3688   static bool costs_initialized = false;
3689   static unsigned integer_cost [2];
3690   static unsigned symbol_cost [2];
3691   static unsigned address_cost [2];
3692   tree op0, op1;
3693   comp_cost cost0, cost1, cost;
3694   machine_mode mode;
3695
3696   if (!costs_initialized)
3697     {
3698       tree type = build_pointer_type (integer_type_node);
3699       tree var, addr;
3700       rtx x;
3701       int i;
3702
3703       var = create_tmp_var_raw (integer_type_node, "test_var");
3704       TREE_STATIC (var) = 1;
3705       x = produce_memory_decl_rtl (var, NULL);
3706       SET_DECL_RTL (var, x);
3707
3708       addr = build1 (ADDR_EXPR, type, var);
3709
3710
3711       for (i = 0; i < 2; i++)
3712         {
3713           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3714                                                              2000), i);
3715
3716           symbol_cost[i] = computation_cost (addr, i) + 1;
3717
3718           address_cost[i]
3719             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3720           if (dump_file && (dump_flags & TDF_DETAILS))
3721             {
3722               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3723               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3724               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3725               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3726               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3727               fprintf (dump_file, "\n");
3728             }
3729         }
3730
3731       costs_initialized = true;
3732     }
3733
3734   STRIP_NOPS (expr);
3735
3736   if (SSA_VAR_P (expr))
3737     return no_cost;
3738
3739   if (is_gimple_min_invariant (expr))
3740     {
3741       if (TREE_CODE (expr) == INTEGER_CST)
3742         return new_cost (integer_cost [speed], 0);
3743
3744       if (TREE_CODE (expr) == ADDR_EXPR)
3745         {
3746           tree obj = TREE_OPERAND (expr, 0);
3747
3748           if (TREE_CODE (obj) == VAR_DECL
3749               || TREE_CODE (obj) == PARM_DECL
3750               || TREE_CODE (obj) == RESULT_DECL)
3751             return new_cost (symbol_cost [speed], 0);
3752         }
3753
3754       return new_cost (address_cost [speed], 0);
3755     }
3756
3757   switch (TREE_CODE (expr))
3758     {
3759     case POINTER_PLUS_EXPR:
3760     case PLUS_EXPR:
3761     case MINUS_EXPR:
3762     case MULT_EXPR:
3763       op0 = TREE_OPERAND (expr, 0);
3764       op1 = TREE_OPERAND (expr, 1);
3765       STRIP_NOPS (op0);
3766       STRIP_NOPS (op1);
3767       break;
3768
3769     CASE_CONVERT:
3770     case NEGATE_EXPR:
3771       op0 = TREE_OPERAND (expr, 0);
3772       STRIP_NOPS (op0);
3773       op1 = NULL_TREE;
3774       break;
3775
3776     default:
3777       /* Just an arbitrary value, FIXME.  */
3778       return new_cost (target_spill_cost[speed], 0);
3779     }
3780
3781   if (op0 == NULL_TREE
3782       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3783     cost0 = no_cost;
3784   else
3785     cost0 = force_expr_to_var_cost (op0, speed);
3786
3787   if (op1 == NULL_TREE
3788       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3789     cost1 = no_cost;
3790   else
3791     cost1 = force_expr_to_var_cost (op1, speed);
3792
3793   mode = TYPE_MODE (TREE_TYPE (expr));
3794   switch (TREE_CODE (expr))
3795     {
3796     case POINTER_PLUS_EXPR:
3797     case PLUS_EXPR:
3798     case MINUS_EXPR:
3799     case NEGATE_EXPR:
3800       cost = new_cost (add_cost (speed, mode), 0);
3801       if (TREE_CODE (expr) != NEGATE_EXPR)
3802         {
3803           tree mult = NULL_TREE;
3804           comp_cost sa_cost;
3805           if (TREE_CODE (op1) == MULT_EXPR)
3806             mult = op1;
3807           else if (TREE_CODE (op0) == MULT_EXPR)
3808             mult = op0;
3809
3810           if (mult != NULL_TREE
3811               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3812               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3813                                     speed, &sa_cost))
3814             return sa_cost;
3815         }
3816       break;
3817
3818     CASE_CONVERT:
3819       {
3820         tree inner_mode, outer_mode;
3821         outer_mode = TREE_TYPE (expr);
3822         inner_mode = TREE_TYPE (op0);
3823         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3824                                        TYPE_MODE (inner_mode), speed), 0);
3825       }
3826       break;
3827
3828     case MULT_EXPR:
3829       if (cst_and_fits_in_hwi (op0))
3830         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3831                                              mode, speed), 0);
3832       else if (cst_and_fits_in_hwi (op1))
3833         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3834                                              mode, speed), 0);
3835       else
3836         return new_cost (target_spill_cost [speed], 0);
3837       break;
3838
3839     default:
3840       gcc_unreachable ();
3841     }
3842
3843   cost = add_costs (cost, cost0);
3844   cost = add_costs (cost, cost1);
3845
3846   /* Bound the cost by target_spill_cost.  The parts of complicated
3847      computations often are either loop invariant or at least can
3848      be shared between several iv uses, so letting this grow without
3849      limits would not give reasonable results.  */
3850   if (cost.cost > (int) target_spill_cost [speed])
3851     cost.cost = target_spill_cost [speed];
3852
3853   return cost;
3854 }
3855
3856 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3857    invariants the computation depends on.  */
3858
3859 static comp_cost
3860 force_var_cost (struct ivopts_data *data,
3861                 tree expr, bitmap *depends_on)
3862 {
3863   if (depends_on)
3864     {
3865       fd_ivopts_data = data;
3866       walk_tree (&expr, find_depends, depends_on, NULL);
3867     }
3868
3869   return force_expr_to_var_cost (expr, data->speed);
3870 }
3871
3872 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3873    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3874    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3875    invariants the computation depends on.  */
3876
3877 static comp_cost
3878 split_address_cost (struct ivopts_data *data,
3879                     tree addr, bool *symbol_present, bool *var_present,
3880                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3881 {
3882   tree core;
3883   HOST_WIDE_INT bitsize;
3884   HOST_WIDE_INT bitpos;
3885   tree toffset;
3886   machine_mode mode;
3887   int unsignedp, reversep, volatilep;
3888
3889   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3890                               &unsignedp, &reversep, &volatilep, false);
3891
3892   if (toffset != 0
3893       || bitpos % BITS_PER_UNIT != 0
3894       || reversep
3895       || TREE_CODE (core) != VAR_DECL)
3896     {
3897       *symbol_present = false;
3898       *var_present = true;
3899       fd_ivopts_data = data;
3900       walk_tree (&addr, find_depends, depends_on, NULL);
3901       return new_cost (target_spill_cost[data->speed], 0);
3902     }
3903
3904   *offset += bitpos / BITS_PER_UNIT;
3905   if (TREE_STATIC (core)
3906       || DECL_EXTERNAL (core))
3907     {
3908       *symbol_present = true;
3909       *var_present = false;
3910       return no_cost;
3911     }
3912
3913   *symbol_present = false;
3914   *var_present = true;
3915   return no_cost;
3916 }
3917
3918 /* Estimates cost of expressing difference of addresses E1 - E2 as
3919    var + symbol + offset.  The value of offset is added to OFFSET,
3920    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3921    part is missing.  DEPENDS_ON is a set of the invariants the computation
3922    depends on.  */
3923
3924 static comp_cost
3925 ptr_difference_cost (struct ivopts_data *data,
3926                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3927                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3928 {
3929   HOST_WIDE_INT diff = 0;
3930   aff_tree aff_e1, aff_e2;
3931   tree type;
3932
3933   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3934
3935   if (ptr_difference_const (e1, e2, &diff))
3936     {
3937       *offset += diff;
3938       *symbol_present = false;
3939       *var_present = false;
3940       return no_cost;
3941     }
3942
3943   if (integer_zerop (e2))
3944     return split_address_cost (data, TREE_OPERAND (e1, 0),
3945                                symbol_present, var_present, offset, depends_on);
3946
3947   *symbol_present = false;
3948   *var_present = true;
3949
3950   type = signed_type_for (TREE_TYPE (e1));
3951   tree_to_aff_combination (e1, type, &aff_e1);
3952   tree_to_aff_combination (e2, type, &aff_e2);
3953   aff_combination_scale (&aff_e2, -1);
3954   aff_combination_add (&aff_e1, &aff_e2);
3955
3956   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3957 }
3958
3959 /* Estimates cost of expressing difference E1 - E2 as
3960    var + symbol + offset.  The value of offset is added to OFFSET,
3961    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3962    part is missing.  DEPENDS_ON is a set of the invariants the computation
3963    depends on.  */
3964
3965 static comp_cost
3966 difference_cost (struct ivopts_data *data,
3967                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3968                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3969 {
3970   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3971   unsigned HOST_WIDE_INT off1, off2;
3972   aff_tree aff_e1, aff_e2;
3973   tree type;
3974
3975   e1 = strip_offset (e1, &off1);
3976   e2 = strip_offset (e2, &off2);
3977   *offset += off1 - off2;
3978
3979   STRIP_NOPS (e1);
3980   STRIP_NOPS (e2);
3981
3982   if (TREE_CODE (e1) == ADDR_EXPR)
3983     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3984                                 offset, depends_on);
3985   *symbol_present = false;
3986
3987   if (operand_equal_p (e1, e2, 0))
3988     {
3989       *var_present = false;
3990       return no_cost;
3991     }
3992
3993   *var_present = true;
3994
3995   if (integer_zerop (e2))
3996     return force_var_cost (data, e1, depends_on);
3997
3998   if (integer_zerop (e1))
3999     {
4000       comp_cost cost = force_var_cost (data, e2, depends_on);
4001       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4002       return cost;
4003     }
4004
4005   type = signed_type_for (TREE_TYPE (e1));
4006   tree_to_aff_combination (e1, type, &aff_e1);
4007   tree_to_aff_combination (e2, type, &aff_e2);
4008   aff_combination_scale (&aff_e2, -1);
4009   aff_combination_add (&aff_e1, &aff_e2);
4010
4011   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4012 }
4013
4014 /* Returns true if AFF1 and AFF2 are identical.  */
4015
4016 static bool
4017 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4018 {
4019   unsigned i;
4020
4021   if (aff1->n != aff2->n)
4022     return false;
4023
4024   for (i = 0; i < aff1->n; i++)
4025     {
4026       if (aff1->elts[i].coef != aff2->elts[i].coef)
4027         return false;
4028
4029       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4030         return false;
4031     }
4032   return true;
4033 }
4034
4035 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4036
4037 static int
4038 get_expr_id (struct ivopts_data *data, tree expr)
4039 {
4040   struct iv_inv_expr_ent ent;
4041   struct iv_inv_expr_ent **slot;
4042
4043   ent.expr = expr;
4044   ent.hash = iterative_hash_expr (expr, 0);
4045   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4046   if (*slot)
4047     return (*slot)->id;
4048
4049   *slot = XNEW (struct iv_inv_expr_ent);
4050   (*slot)->expr = expr;
4051   (*slot)->hash = ent.hash;
4052   (*slot)->id = data->inv_expr_id++;
4053   return (*slot)->id;
4054 }
4055
4056 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4057    requires a new compiler generated temporary.  Returns -1 otherwise.
4058    ADDRESS_P is a flag indicating if the expression is for address
4059    computation.  */
4060
4061 static int
4062 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4063                             tree cbase, HOST_WIDE_INT ratio,
4064                             bool address_p)
4065 {
4066   aff_tree ubase_aff, cbase_aff;
4067   tree expr, ub, cb;
4068
4069   STRIP_NOPS (ubase);
4070   STRIP_NOPS (cbase);
4071   ub = ubase;
4072   cb = cbase;
4073
4074   if ((TREE_CODE (ubase) == INTEGER_CST)
4075       && (TREE_CODE (cbase) == INTEGER_CST))
4076     return -1;
4077
4078   /* Strips the constant part. */
4079   if (TREE_CODE (ubase) == PLUS_EXPR
4080       || TREE_CODE (ubase) == MINUS_EXPR
4081       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4082     {
4083       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4084         ubase = TREE_OPERAND (ubase, 0);
4085     }
4086
4087   /* Strips the constant part. */
4088   if (TREE_CODE (cbase) == PLUS_EXPR
4089       || TREE_CODE (cbase) == MINUS_EXPR
4090       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4091     {
4092       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4093         cbase = TREE_OPERAND (cbase, 0);
4094     }
4095
4096   if (address_p)
4097     {
4098       if (((TREE_CODE (ubase) == SSA_NAME)
4099            || (TREE_CODE (ubase) == ADDR_EXPR
4100                && is_gimple_min_invariant (ubase)))
4101           && (TREE_CODE (cbase) == INTEGER_CST))
4102         return -1;
4103
4104       if (((TREE_CODE (cbase) == SSA_NAME)
4105            || (TREE_CODE (cbase) == ADDR_EXPR
4106                && is_gimple_min_invariant (cbase)))
4107           && (TREE_CODE (ubase) == INTEGER_CST))
4108         return -1;
4109     }
4110
4111   if (ratio == 1)
4112     {
4113       if (operand_equal_p (ubase, cbase, 0))
4114         return -1;
4115
4116       if (TREE_CODE (ubase) == ADDR_EXPR
4117           && TREE_CODE (cbase) == ADDR_EXPR)
4118         {
4119           tree usym, csym;
4120
4121           usym = TREE_OPERAND (ubase, 0);
4122           csym = TREE_OPERAND (cbase, 0);
4123           if (TREE_CODE (usym) == ARRAY_REF)
4124             {
4125               tree ind = TREE_OPERAND (usym, 1);
4126               if (TREE_CODE (ind) == INTEGER_CST
4127                   && tree_fits_shwi_p (ind)
4128                   && tree_to_shwi (ind) == 0)
4129                 usym = TREE_OPERAND (usym, 0);
4130             }
4131           if (TREE_CODE (csym) == ARRAY_REF)
4132             {
4133               tree ind = TREE_OPERAND (csym, 1);
4134               if (TREE_CODE (ind) == INTEGER_CST
4135                   && tree_fits_shwi_p (ind)
4136                   && tree_to_shwi (ind) == 0)
4137                 csym = TREE_OPERAND (csym, 0);
4138             }
4139           if (operand_equal_p (usym, csym, 0))
4140             return -1;
4141         }
4142       /* Now do more complex comparison  */
4143       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4144       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4145       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4146         return -1;
4147     }
4148
4149   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4150   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4151
4152   aff_combination_scale (&cbase_aff, -1 * ratio);
4153   aff_combination_add (&ubase_aff, &cbase_aff);
4154   expr = aff_combination_to_tree (&ubase_aff);
4155   return get_expr_id (data, expr);
4156 }
4157
4158
4159
4160 /* Determines the cost of the computation by that USE is expressed
4161    from induction variable CAND.  If ADDRESS_P is true, we just need
4162    to create an address from it, otherwise we want to get it into
4163    register.  A set of invariants we depend on is stored in
4164    DEPENDS_ON.  AT is the statement at that the value is computed.
4165    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4166    addressing is likely.  */
4167
4168 static comp_cost
4169 get_computation_cost_at (struct ivopts_data *data,
4170                          struct iv_use *use, struct iv_cand *cand,
4171                          bool address_p, bitmap *depends_on, gimple at,
4172                          bool *can_autoinc,
4173                          int *inv_expr_id)
4174 {
4175   tree ubase = use->iv->base, ustep = use->iv->step;
4176   tree cbase, cstep;
4177   tree utype = TREE_TYPE (ubase), ctype;
4178   unsigned HOST_WIDE_INT cstepi, offset = 0;
4179   HOST_WIDE_INT ratio, aratio;
4180   bool var_present, symbol_present, stmt_is_after_inc;
4181   comp_cost cost;
4182   widest_int rat;
4183   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4184   machine_mode mem_mode = (address_p
4185                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4186                                 : VOIDmode);
4187
4188   *depends_on = NULL;
4189
4190   /* Only consider real candidates.  */
4191   if (!cand->iv)
4192     return infinite_cost;
4193
4194   cbase = cand->iv->base;
4195   cstep = cand->iv->step;
4196   ctype = TREE_TYPE (cbase);
4197
4198   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4199     {
4200       /* We do not have a precision to express the values of use.  */
4201       return infinite_cost;
4202     }
4203
4204   if (address_p
4205       || (use->iv->base_object
4206           && cand->iv->base_object
4207           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4208           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4209     {
4210       /* Do not try to express address of an object with computation based
4211          on address of a different object.  This may cause problems in rtl
4212          level alias analysis (that does not expect this to be happening,
4213          as this is illegal in C), and would be unlikely to be useful
4214          anyway.  */
4215       if (use->iv->base_object
4216           && cand->iv->base_object
4217           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4218         return infinite_cost;
4219     }
4220
4221   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4222     {
4223       /* TODO -- add direct handling of this case.  */
4224       goto fallback;
4225     }
4226
4227   /* CSTEPI is removed from the offset in case statement is after the
4228      increment.  If the step is not constant, we use zero instead.
4229      This is a bit imprecise (there is the extra addition), but
4230      redundancy elimination is likely to transform the code so that
4231      it uses value of the variable before increment anyway,
4232      so it is not that much unrealistic.  */
4233   if (cst_and_fits_in_hwi (cstep))
4234     cstepi = int_cst_value (cstep);
4235   else
4236     cstepi = 0;
4237
4238   if (!constant_multiple_of (ustep, cstep, &rat))
4239     return infinite_cost;
4240
4241   if (wi::fits_shwi_p (rat))
4242     ratio = rat.to_shwi ();
4243   else
4244     return infinite_cost;
4245
4246   STRIP_NOPS (cbase);
4247   ctype = TREE_TYPE (cbase);
4248
4249   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4250
4251   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4252      or ratio == 1, it is better to handle this like
4253
4254      ubase - ratio * cbase + ratio * var
4255
4256      (also holds in the case ratio == -1, TODO.  */
4257
4258   if (cst_and_fits_in_hwi (cbase))
4259     {
4260       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4261       cost = difference_cost (data,
4262                               ubase, build_int_cst (utype, 0),
4263                               &symbol_present, &var_present, &offset,
4264                               depends_on);
4265       cost.cost /= avg_loop_niter (data->current_loop);
4266     }
4267   else if (ratio == 1)
4268     {
4269       tree real_cbase = cbase;
4270
4271       /* Check to see if any adjustment is needed.  */
4272       if (cstepi == 0 && stmt_is_after_inc)
4273         {
4274           aff_tree real_cbase_aff;
4275           aff_tree cstep_aff;
4276
4277           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4278                                    &real_cbase_aff);
4279           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4280
4281           aff_combination_add (&real_cbase_aff, &cstep_aff);
4282           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4283         }
4284
4285       cost = difference_cost (data,
4286                               ubase, real_cbase,
4287                               &symbol_present, &var_present, &offset,
4288                               depends_on);
4289       cost.cost /= avg_loop_niter (data->current_loop);
4290     }
4291   else if (address_p
4292            && !POINTER_TYPE_P (ctype)
4293            && multiplier_allowed_in_address_p
4294                 (ratio, mem_mode,
4295                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4296     {
4297       cbase
4298         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4299       cost = difference_cost (data,
4300                               ubase, cbase,
4301                               &symbol_present, &var_present, &offset,
4302                               depends_on);
4303       cost.cost /= avg_loop_niter (data->current_loop);
4304     }
4305   else
4306     {
4307       cost = force_var_cost (data, cbase, depends_on);
4308       cost = add_costs (cost,
4309                         difference_cost (data,
4310                                          ubase, build_int_cst (utype, 0),
4311                                          &symbol_present, &var_present,
4312                                          &offset, depends_on));
4313       cost.cost /= avg_loop_niter (data->current_loop);
4314       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4315     }
4316
4317   if (inv_expr_id)
4318     {
4319       *inv_expr_id =
4320           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4321       /* Clear depends on.  */
4322       if (*inv_expr_id != -1 && depends_on && *depends_on)
4323         bitmap_clear (*depends_on);
4324     }
4325
4326   /* If we are after the increment, the value of the candidate is higher by
4327      one iteration.  */
4328   if (stmt_is_after_inc)
4329     offset -= ratio * cstepi;
4330
4331   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4332      (symbol/var1/const parts may be omitted).  If we are looking for an
4333      address, find the cost of addressing this.  */
4334   if (address_p)
4335     return add_costs (cost,
4336                       get_address_cost (symbol_present, var_present,
4337                                         offset, ratio, cstepi,
4338                                         mem_mode,
4339                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4340                                         speed, stmt_is_after_inc,
4341                                         can_autoinc));
4342
4343   /* Otherwise estimate the costs for computing the expression.  */
4344   if (!symbol_present && !var_present && !offset)
4345     {
4346       if (ratio != 1)
4347         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4348       return cost;
4349     }
4350
4351   /* Symbol + offset should be compile-time computable so consider that they
4352       are added once to the variable, if present.  */
4353   if (var_present && (symbol_present || offset))
4354     cost.cost += adjust_setup_cost (data,
4355                                     add_cost (speed, TYPE_MODE (ctype)));
4356
4357   /* Having offset does not affect runtime cost in case it is added to
4358      symbol, but it increases complexity.  */
4359   if (offset)
4360     cost.complexity++;
4361
4362   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4363
4364   aratio = ratio > 0 ? ratio : -ratio;
4365   if (aratio != 1)
4366     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4367   return cost;
4368
4369 fallback:
4370   if (can_autoinc)
4371     *can_autoinc = false;
4372
4373   {
4374     /* Just get the expression, expand it and measure the cost.  */
4375     tree comp = get_computation_at (data->current_loop, use, cand, at);
4376
4377     if (!comp)
4378       return infinite_cost;
4379
4380     if (address_p)
4381       comp = build_simple_mem_ref (comp);
4382
4383     return new_cost (computation_cost (comp, speed), 0);
4384   }
4385 }
4386
4387 /* Determines the cost of the computation by that USE is expressed
4388    from induction variable CAND.  If ADDRESS_P is true, we just need
4389    to create an address from it, otherwise we want to get it into
4390    register.  A set of invariants we depend on is stored in
4391    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4392    autoinc addressing is likely.  */
4393
4394 static comp_cost
4395 get_computation_cost (struct ivopts_data *data,
4396                       struct iv_use *use, struct iv_cand *cand,
4397                       bool address_p, bitmap *depends_on,
4398                       bool *can_autoinc, int *inv_expr_id)
4399 {
4400   return get_computation_cost_at (data,
4401                                   use, cand, address_p, depends_on, use->stmt,
4402                                   can_autoinc, inv_expr_id);
4403 }
4404
4405 /* Determines cost of basing replacement of USE on CAND in a generic
4406    expression.  */
4407
4408 static bool
4409 determine_use_iv_cost_generic (struct ivopts_data *data,
4410                                struct iv_use *use, struct iv_cand *cand)
4411 {
4412   bitmap depends_on;
4413   comp_cost cost;
4414   int inv_expr_id = -1;
4415
4416   /* The simple case first -- if we need to express value of the preserved
4417      original biv, the cost is 0.  This also prevents us from counting the
4418      cost of increment twice -- once at this use and once in the cost of
4419      the candidate.  */
4420   if (cand->pos == IP_ORIGINAL
4421       && cand->incremented_at == use->stmt)
4422     {
4423       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4424                        ERROR_MARK, -1);
4425       return true;
4426     }
4427
4428   cost = get_computation_cost (data, use, cand, false, &depends_on,
4429                                NULL, &inv_expr_id);
4430
4431   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4432                    inv_expr_id);
4433
4434   return !infinite_cost_p (cost);
4435 }
4436
4437 /* Determines cost of basing replacement of USE on CAND in an address.  */
4438
4439 static bool
4440 determine_use_iv_cost_address (struct ivopts_data *data,
4441                                struct iv_use *use, struct iv_cand *cand)
4442 {
4443   bitmap depends_on;
4444   bool can_autoinc;
4445   int inv_expr_id = -1;
4446   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4447                                          &can_autoinc, &inv_expr_id);
4448
4449   if (cand->ainc_use == use)
4450     {
4451       if (can_autoinc)
4452         cost.cost -= cand->cost_step;
4453       /* If we generated the candidate solely for exploiting autoincrement
4454          opportunities, and it turns out it can't be used, set the cost to
4455          infinity to make sure we ignore it.  */
4456       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4457         cost = infinite_cost;
4458     }
4459   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4460                    inv_expr_id);
4461
4462   return !infinite_cost_p (cost);
4463 }
4464
4465 /* Computes value of candidate CAND at position AT in iteration NITER, and
4466    stores it to VAL.  */
4467
4468 static void
4469 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4470                aff_tree *val)
4471 {
4472   aff_tree step, delta, nit;
4473   struct iv *iv = cand->iv;
4474   tree type = TREE_TYPE (iv->base);
4475   tree steptype = type;
4476   if (POINTER_TYPE_P (type))
4477     steptype = sizetype;
4478   steptype = unsigned_type_for (type);
4479
4480   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4481   aff_combination_convert (&step, steptype);
4482   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4483   aff_combination_convert (&nit, steptype);
4484   aff_combination_mult (&nit, &step, &delta);
4485   if (stmt_after_increment (loop, cand, at))
4486     aff_combination_add (&delta, &step);
4487
4488   tree_to_aff_combination (iv->base, type, val);
4489   if (!POINTER_TYPE_P (type))
4490     aff_combination_convert (val, steptype);
4491   aff_combination_add (val, &delta);
4492 }
4493
4494 /* Returns period of induction variable iv.  */
4495
4496 static tree
4497 iv_period (struct iv *iv)
4498 {
4499   tree step = iv->step, period, type;
4500   tree pow2div;
4501
4502   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4503
4504   type = unsigned_type_for (TREE_TYPE (step));
4505   /* Period of the iv is lcm (step, type_range)/step -1,
4506      i.e., N*type_range/step - 1. Since type range is power
4507      of two, N == (step >> num_of_ending_zeros_binary (step),
4508      so the final result is
4509
4510        (type_range >> num_of_ending_zeros_binary (step)) - 1
4511
4512   */
4513   pow2div = num_ending_zeros (step);
4514
4515   period = build_low_bits_mask (type,
4516                                 (TYPE_PRECISION (type)
4517                                  - tree_to_uhwi (pow2div)));
4518
4519   return period;
4520 }
4521
4522 /* Returns the comparison operator used when eliminating the iv USE.  */
4523
4524 static enum tree_code
4525 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4526 {
4527   struct loop *loop = data->current_loop;
4528   basic_block ex_bb;
4529   edge exit;
4530
4531   ex_bb = gimple_bb (use->stmt);
4532   exit = EDGE_SUCC (ex_bb, 0);
4533   if (flow_bb_inside_loop_p (loop, exit->dest))
4534     exit = EDGE_SUCC (ex_bb, 1);
4535
4536   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4537 }
4538
4539 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4540    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4541    calculation is performed in non-wrapping type.
4542
4543    TODO: More generally, we could test for the situation that
4544          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4545          This would require knowing the sign of OFFSET.  */
4546
4547 static bool
4548 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4549 {
4550   enum tree_code code;
4551   tree e1, e2;
4552   aff_tree aff_e1, aff_e2, aff_offset;
4553
4554   if (!nowrap_type_p (TREE_TYPE (base)))
4555     return false;
4556
4557   base = expand_simple_operations (base);
4558
4559   if (TREE_CODE (base) == SSA_NAME)
4560     {
4561       gimple stmt = SSA_NAME_DEF_STMT (base);
4562
4563       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4564         return false;
4565
4566       code = gimple_assign_rhs_code (stmt);
4567       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4568         return false;
4569
4570       e1 = gimple_assign_rhs1 (stmt);
4571       e2 = gimple_assign_rhs2 (stmt);
4572     }
4573   else
4574     {
4575       code = TREE_CODE (base);
4576       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4577         return false;
4578       e1 = TREE_OPERAND (base, 0);
4579       e2 = TREE_OPERAND (base, 1);
4580     }
4581
4582   /* Use affine expansion as deeper inspection to prove the equality.  */
4583   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4584                                   &aff_e2, &data->name_expansion_cache);
4585   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4586                                   &aff_offset, &data->name_expansion_cache);
4587   aff_combination_scale (&aff_offset, -1);
4588   switch (code)
4589     {
4590     case PLUS_EXPR:
4591       aff_combination_add (&aff_e2, &aff_offset);
4592       if (aff_combination_zero_p (&aff_e2))
4593         return true;
4594
4595       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4596                                       &aff_e1, &data->name_expansion_cache);
4597       aff_combination_add (&aff_e1, &aff_offset);
4598       return aff_combination_zero_p (&aff_e1);
4599
4600     case POINTER_PLUS_EXPR:
4601       aff_combination_add (&aff_e2, &aff_offset);
4602       return aff_combination_zero_p (&aff_e2);
4603
4604     default:
4605       return false;
4606     }
4607 }
4608
4609 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4610    comparison with CAND.  NITER describes the number of iterations of
4611    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4612
4613    We aim to handle the following situation:
4614
4615    sometype *base, *p;
4616    int a, b, i;
4617
4618    i = a;
4619    p = p_0 = base + a;
4620
4621    do
4622      {
4623        bla (*p);
4624        p++;
4625        i++;
4626      }
4627    while (i < b);
4628
4629    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4630    We aim to optimize this to
4631
4632    p = p_0 = base + a;
4633    do
4634      {
4635        bla (*p);
4636        p++;
4637      }
4638    while (p < p_0 - a + b);
4639
4640    This preserves the correctness, since the pointer arithmetics does not
4641    overflow.  More precisely:
4642
4643    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4644       overflow in computing it or the values of p.
4645    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4646       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4647
4648 static bool
4649 iv_elimination_compare_lt (struct ivopts_data *data,
4650                            struct iv_cand *cand, enum tree_code *comp_p,
4651                            struct tree_niter_desc *niter)
4652 {
4653   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4654   struct aff_tree nit, tmpa, tmpb;
4655   enum tree_code comp;
4656   HOST_WIDE_INT step;
4657
4658   /* We need to know that the candidate induction variable does not overflow.
4659      While more complex analysis may be used to prove this, for now just
4660      check that the variable appears in the original program and that it
4661      is computed in a type that guarantees no overflows.  */
4662   cand_type = TREE_TYPE (cand->iv->base);
4663   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4664     return false;
4665
4666   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4667      the calculation of the BOUND could overflow, making the comparison
4668      invalid.  */
4669   if (!data->loop_single_exit_p)
4670     return false;
4671
4672   /* We need to be able to decide whether candidate is increasing or decreasing
4673      in order to choose the right comparison operator.  */
4674   if (!cst_and_fits_in_hwi (cand->iv->step))
4675     return false;
4676   step = int_cst_value (cand->iv->step);
4677
4678   /* Check that the number of iterations matches the expected pattern:
4679      a + 1 > b ? 0 : b - a - 1.  */
4680   mbz = niter->may_be_zero;
4681   if (TREE_CODE (mbz) == GT_EXPR)
4682     {
4683       /* Handle a + 1 > b.  */
4684       tree op0 = TREE_OPERAND (mbz, 0);
4685       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4686         {
4687           a = TREE_OPERAND (op0, 0);
4688           b = TREE_OPERAND (mbz, 1);
4689         }
4690       else
4691         return false;
4692     }
4693   else if (TREE_CODE (mbz) == LT_EXPR)
4694     {
4695       tree op1 = TREE_OPERAND (mbz, 1);
4696
4697       /* Handle b < a + 1.  */
4698       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4699         {
4700           a = TREE_OPERAND (op1, 0);
4701           b = TREE_OPERAND (mbz, 0);
4702         }
4703       else
4704         return false;
4705     }
4706   else
4707     return false;
4708
4709   /* Expected number of iterations is B - A - 1.  Check that it matches
4710      the actual number, i.e., that B - A - NITER = 1.  */
4711   tree_to_aff_combination (niter->niter, nit_type, &nit);
4712   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4713   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4714   aff_combination_scale (&nit, -1);
4715   aff_combination_scale (&tmpa, -1);
4716   aff_combination_add (&tmpb, &tmpa);
4717   aff_combination_add (&tmpb, &nit);
4718   if (tmpb.n != 0 || tmpb.offset != 1)
4719     return false;
4720
4721   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4722      overflow.  */
4723   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4724                         cand->iv->step,
4725                         fold_convert (TREE_TYPE (cand->iv->step), a));
4726   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4727     return false;
4728
4729   /* Determine the new comparison operator.  */
4730   comp = step < 0 ? GT_EXPR : LT_EXPR;
4731   if (*comp_p == NE_EXPR)
4732     *comp_p = comp;
4733   else if (*comp_p == EQ_EXPR)
4734     *comp_p = invert_tree_comparison (comp, false);
4735   else
4736     gcc_unreachable ();
4737
4738   return true;
4739 }
4740
4741 /* Check whether it is possible to express the condition in USE by comparison
4742    of candidate CAND.  If so, store the value compared with to BOUND, and the
4743    comparison operator to COMP.  */
4744
4745 static bool
4746 may_eliminate_iv (struct ivopts_data *data,
4747                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4748                   enum tree_code *comp)
4749 {
4750   basic_block ex_bb;
4751   edge exit;
4752   tree period;
4753   struct loop *loop = data->current_loop;
4754   aff_tree bnd;
4755   struct tree_niter_desc *desc = NULL;
4756
4757   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4758     return false;
4759
4760   /* For now works only for exits that dominate the loop latch.
4761      TODO: extend to other conditions inside loop body.  */
4762   ex_bb = gimple_bb (use->stmt);
4763   if (use->stmt != last_stmt (ex_bb)
4764       || gimple_code (use->stmt) != GIMPLE_COND
4765       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4766     return false;
4767
4768   exit = EDGE_SUCC (ex_bb, 0);
4769   if (flow_bb_inside_loop_p (loop, exit->dest))
4770     exit = EDGE_SUCC (ex_bb, 1);
4771   if (flow_bb_inside_loop_p (loop, exit->dest))
4772     return false;
4773
4774   desc = niter_for_exit (data, exit);
4775   if (!desc)
4776     return false;
4777
4778   /* Determine whether we can use the variable to test the exit condition.
4779      This is the case iff the period of the induction variable is greater
4780      than the number of iterations for which the exit condition is true.  */
4781   period = iv_period (cand->iv);
4782
4783   /* If the number of iterations is constant, compare against it directly.  */
4784   if (TREE_CODE (desc->niter) == INTEGER_CST)
4785     {
4786       /* See cand_value_at.  */
4787       if (stmt_after_increment (loop, cand, use->stmt))
4788         {
4789           if (!tree_int_cst_lt (desc->niter, period))
4790             return false;
4791         }
4792       else
4793         {
4794           if (tree_int_cst_lt (period, desc->niter))
4795             return false;
4796         }
4797     }
4798
4799   /* If not, and if this is the only possible exit of the loop, see whether
4800      we can get a conservative estimate on the number of iterations of the
4801      entire loop and compare against that instead.  */
4802   else
4803     {
4804       widest_int period_value, max_niter;
4805
4806       max_niter = desc->max;
4807       if (stmt_after_increment (loop, cand, use->stmt))
4808         max_niter += 1;
4809       period_value = wi::to_widest (period);
4810       if (wi::gtu_p (max_niter, period_value))
4811         {
4812           /* See if we can take advantage of inferred loop bound information.  */
4813           if (data->loop_single_exit_p)
4814             {
4815               if (!max_loop_iterations (loop, &max_niter))
4816                 return false;
4817               /* The loop bound is already adjusted by adding 1.  */
4818               if (wi::gtu_p (max_niter, period_value))
4819                 return false;
4820             }
4821           else
4822             return false;
4823         }
4824     }
4825
4826   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4827
4828   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4829                          aff_combination_to_tree (&bnd));
4830   *comp = iv_elimination_compare (data, use);
4831
4832   /* It is unlikely that computing the number of iterations using division
4833      would be more profitable than keeping the original induction variable.  */
4834   if (expression_expensive_p (*bound))
4835     return false;
4836
4837   /* Sometimes, it is possible to handle the situation that the number of
4838      iterations may be zero unless additional assumtions by using <
4839      instead of != in the exit condition.
4840
4841      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4842            base the exit condition on it.  However, that is often too
4843            expensive.  */
4844   if (!integer_zerop (desc->may_be_zero))
4845     return iv_elimination_compare_lt (data, cand, comp, desc);
4846
4847   return true;
4848 }
4849
4850  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4851     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4852
4853 static int
4854 parm_decl_cost (struct ivopts_data *data, tree bound)
4855 {
4856   tree sbound = bound;
4857   STRIP_NOPS (sbound);
4858
4859   if (TREE_CODE (sbound) == SSA_NAME
4860       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4861       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4862       && data->body_includes_call)
4863     return COSTS_N_INSNS (1);
4864
4865   return 0;
4866 }
4867
4868 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4869
4870 static bool
4871 determine_use_iv_cost_condition (struct ivopts_data *data,
4872                                  struct iv_use *use, struct iv_cand *cand)
4873 {
4874   tree bound = NULL_TREE;
4875   struct iv *cmp_iv;
4876   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4877   comp_cost elim_cost, express_cost, cost, bound_cost;
4878   bool ok;
4879   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4880   tree *control_var, *bound_cst;
4881   enum tree_code comp = ERROR_MARK;
4882
4883   /* Only consider real candidates.  */
4884   if (!cand->iv)
4885     {
4886       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4887                        ERROR_MARK, -1);
4888       return false;
4889     }
4890
4891   /* Try iv elimination.  */
4892   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4893     {
4894       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4895       if (elim_cost.cost == 0)
4896         elim_cost.cost = parm_decl_cost (data, bound);
4897       else if (TREE_CODE (bound) == INTEGER_CST)
4898         elim_cost.cost = 0;
4899       /* If we replace a loop condition 'i < n' with 'p < base + n',
4900          depends_on_elim will have 'base' and 'n' set, which implies
4901          that both 'base' and 'n' will be live during the loop.  More likely,
4902          'base + n' will be loop invariant, resulting in only one live value
4903          during the loop.  So in that case we clear depends_on_elim and set
4904         elim_inv_expr_id instead.  */
4905       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4906         {
4907           elim_inv_expr_id = get_expr_id (data, bound);
4908           bitmap_clear (depends_on_elim);
4909         }
4910       /* The bound is a loop invariant, so it will be only computed
4911          once.  */
4912       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4913     }
4914   else
4915     elim_cost = infinite_cost;
4916
4917   /* Try expressing the original giv.  If it is compared with an invariant,
4918      note that we cannot get rid of it.  */
4919   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4920                               NULL, &cmp_iv);
4921   gcc_assert (ok);
4922
4923   /* When the condition is a comparison of the candidate IV against
4924      zero, prefer this IV.
4925
4926      TODO: The constant that we're subtracting from the cost should
4927      be target-dependent.  This information should be added to the
4928      target costs for each backend.  */
4929   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4930       && integer_zerop (*bound_cst)
4931       && (operand_equal_p (*control_var, cand->var_after, 0)
4932           || operand_equal_p (*control_var, cand->var_before, 0)))
4933     elim_cost.cost -= 1;
4934
4935   express_cost = get_computation_cost (data, use, cand, false,
4936                                        &depends_on_express, NULL,
4937                                        &express_inv_expr_id);
4938   fd_ivopts_data = data;
4939   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4940
4941   /* Count the cost of the original bound as well.  */
4942   bound_cost = force_var_cost (data, *bound_cst, NULL);
4943   if (bound_cost.cost == 0)
4944     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4945   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4946     bound_cost.cost = 0;
4947   express_cost.cost += bound_cost.cost;
4948
4949   /* Choose the better approach, preferring the eliminated IV. */
4950   if (compare_costs (elim_cost, express_cost) <= 0)
4951     {
4952       cost = elim_cost;
4953       depends_on = depends_on_elim;
4954       depends_on_elim = NULL;
4955       inv_expr_id = elim_inv_expr_id;
4956     }
4957   else
4958     {
4959       cost = express_cost;
4960       depends_on = depends_on_express;
4961       depends_on_express = NULL;
4962       bound = NULL_TREE;
4963       comp = ERROR_MARK;
4964       inv_expr_id = express_inv_expr_id;
4965     }
4966
4967   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4968
4969   if (depends_on_elim)
4970     BITMAP_FREE (depends_on_elim);
4971   if (depends_on_express)
4972     BITMAP_FREE (depends_on_express);
4973
4974   return !infinite_cost_p (cost);
4975 }
4976
4977 /* Determines cost of basing replacement of USE on CAND.  Returns false
4978    if USE cannot be based on CAND.  */
4979
4980 static bool
4981 determine_use_iv_cost (struct ivopts_data *data,
4982                        struct iv_use *use, struct iv_cand *cand)
4983 {
4984   switch (use->type)
4985     {
4986     case USE_NONLINEAR_EXPR:
4987       return determine_use_iv_cost_generic (data, use, cand);
4988
4989     case USE_ADDRESS:
4990       return determine_use_iv_cost_address (data, use, cand);
4991
4992     case USE_COMPARE:
4993       return determine_use_iv_cost_condition (data, use, cand);
4994
4995     default:
4996       gcc_unreachable ();
4997     }
4998 }
4999
5000 /* Return true if get_computation_cost indicates that autoincrement is
5001    a possibility for the pair of USE and CAND, false otherwise.  */
5002
5003 static bool
5004 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5005                            struct iv_cand *cand)
5006 {
5007   bitmap depends_on;
5008   bool can_autoinc;
5009   comp_cost cost;
5010
5011   if (use->type != USE_ADDRESS)
5012     return false;
5013
5014   cost = get_computation_cost (data, use, cand, true, &depends_on,
5015                                &can_autoinc, NULL);
5016
5017   BITMAP_FREE (depends_on);
5018
5019   return !infinite_cost_p (cost) && can_autoinc;
5020 }
5021
5022 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5023    use that allows autoincrement, and set their AINC_USE if possible.  */
5024
5025 static void
5026 set_autoinc_for_original_candidates (struct ivopts_data *data)
5027 {
5028   unsigned i, j;
5029
5030   for (i = 0; i < n_iv_cands (data); i++)
5031     {
5032       struct iv_cand *cand = iv_cand (data, i);
5033       struct iv_use *closest_before = NULL;
5034       struct iv_use *closest_after = NULL;
5035       if (cand->pos != IP_ORIGINAL)
5036         continue;
5037
5038       for (j = 0; j < n_iv_uses (data); j++)
5039         {
5040           struct iv_use *use = iv_use (data, j);
5041           unsigned uid = gimple_uid (use->stmt);
5042
5043           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5044             continue;
5045
5046           if (uid < gimple_uid (cand->incremented_at)
5047               && (closest_before == NULL
5048                   || uid > gimple_uid (closest_before->stmt)))
5049             closest_before = use;
5050
5051           if (uid > gimple_uid (cand->incremented_at)
5052               && (closest_after == NULL
5053                   || uid < gimple_uid (closest_after->stmt)))
5054             closest_after = use;
5055         }
5056
5057       if (closest_before != NULL
5058           && autoinc_possible_for_pair (data, closest_before, cand))
5059         cand->ainc_use = closest_before;
5060       else if (closest_after != NULL
5061                && autoinc_possible_for_pair (data, closest_after, cand))
5062         cand->ainc_use = closest_after;
5063     }
5064 }
5065
5066 /* Finds the candidates for the induction variables.  */
5067
5068 static void
5069 find_iv_candidates (struct ivopts_data *data)
5070 {
5071   /* Add commonly used ivs.  */
5072   add_standard_iv_candidates (data);
5073
5074   /* Add old induction variables.  */
5075   add_old_ivs_candidates (data);
5076
5077   /* Add induction variables derived from uses.  */
5078   add_derived_ivs_candidates (data);
5079
5080   set_autoinc_for_original_candidates (data);
5081
5082   /* Record the important candidates.  */
5083   record_important_candidates (data);
5084 }
5085
5086 /* Determines costs of basing the use of the iv on an iv candidate.  */
5087
5088 static void
5089 determine_use_iv_costs (struct ivopts_data *data)
5090 {
5091   unsigned i, j;
5092   struct iv_use *use;
5093   struct iv_cand *cand;
5094   bitmap to_clear = BITMAP_ALLOC (NULL);
5095
5096   alloc_use_cost_map (data);
5097
5098   for (i = 0; i < n_iv_uses (data); i++)
5099     {
5100       use = iv_use (data, i);
5101
5102       if (data->consider_all_candidates)
5103         {
5104           for (j = 0; j < n_iv_cands (data); j++)
5105             {
5106               cand = iv_cand (data, j);
5107               determine_use_iv_cost (data, use, cand);
5108             }
5109         }
5110       else
5111         {
5112           bitmap_iterator bi;
5113
5114           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5115             {
5116               cand = iv_cand (data, j);
5117               if (!determine_use_iv_cost (data, use, cand))
5118                 bitmap_set_bit (to_clear, j);
5119             }
5120
5121           /* Remove the candidates for that the cost is infinite from
5122              the list of related candidates.  */
5123           bitmap_and_compl_into (use->related_cands, to_clear);
5124           bitmap_clear (to_clear);
5125         }
5126     }
5127
5128   BITMAP_FREE (to_clear);
5129
5130   if (dump_file && (dump_flags & TDF_DETAILS))
5131     {
5132       fprintf (dump_file, "Use-candidate costs:\n");
5133
5134       for (i = 0; i < n_iv_uses (data); i++)
5135         {
5136           use = iv_use (data, i);
5137
5138           fprintf (dump_file, "Use %d:\n", i);
5139           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5140           for (j = 0; j < use->n_map_members; j++)
5141             {
5142               if (!use->cost_map[j].cand
5143                   || infinite_cost_p (use->cost_map[j].cost))
5144                 continue;
5145
5146               fprintf (dump_file, "  %d\t%d\t%d\t",
5147                        use->cost_map[j].cand->id,
5148                        use->cost_map[j].cost.cost,
5149                        use->cost_map[j].cost.complexity);
5150               if (use->cost_map[j].depends_on)
5151                 bitmap_print (dump_file,
5152                               use->cost_map[j].depends_on, "","");
5153               if (use->cost_map[j].inv_expr_id != -1)
5154                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5155               fprintf (dump_file, "\n");
5156             }
5157
5158           fprintf (dump_file, "\n");
5159         }
5160       fprintf (dump_file, "\n");
5161     }
5162 }
5163
5164 /* Determines cost of the candidate CAND.  */
5165
5166 static void
5167 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5168 {
5169   comp_cost cost_base;
5170   unsigned cost, cost_step;
5171   tree base;
5172
5173   if (!cand->iv)
5174     {
5175       cand->cost = 0;
5176       return;
5177     }
5178
5179   /* There are two costs associated with the candidate -- its increment
5180      and its initialization.  The second is almost negligible for any loop
5181      that rolls enough, so we take it just very little into account.  */
5182
5183   base = cand->iv->base;
5184   cost_base = force_var_cost (data, base, NULL);
5185   /* It will be exceptional that the iv register happens to be initialized with
5186      the proper value at no cost.  In general, there will at least be a regcopy
5187      or a const set.  */
5188   if (cost_base.cost == 0)
5189     cost_base.cost = COSTS_N_INSNS (1);
5190   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5191
5192   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5193
5194   /* Prefer the original ivs unless we may gain something by replacing it.
5195      The reason is to make debugging simpler; so this is not relevant for
5196      artificial ivs created by other optimization passes.  */
5197   if (cand->pos != IP_ORIGINAL
5198       || !SSA_NAME_VAR (cand->var_before)
5199       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5200     cost++;
5201
5202   /* Prefer not to insert statements into latch unless there are some
5203      already (so that we do not create unnecessary jumps).  */
5204   if (cand->pos == IP_END
5205       && empty_block_p (ip_end_pos (data->current_loop)))
5206     cost++;
5207
5208   cand->cost = cost;
5209   cand->cost_step = cost_step;
5210 }
5211
5212 /* Determines costs of computation of the candidates.  */
5213
5214 static void
5215 determine_iv_costs (struct ivopts_data *data)
5216 {
5217   unsigned i;
5218
5219   if (dump_file && (dump_flags & TDF_DETAILS))
5220     {
5221       fprintf (dump_file, "Candidate costs:\n");
5222       fprintf (dump_file, "  cand\tcost\n");
5223     }
5224
5225   for (i = 0; i < n_iv_cands (data); i++)
5226     {
5227       struct iv_cand *cand = iv_cand (data, i);
5228
5229       determine_iv_cost (data, cand);
5230
5231       if (dump_file && (dump_flags & TDF_DETAILS))
5232         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5233     }
5234
5235   if (dump_file && (dump_flags & TDF_DETAILS))
5236     fprintf (dump_file, "\n");
5237 }
5238
5239 /* Calculates cost for having SIZE induction variables.  */
5240
5241 static unsigned
5242 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5243 {
5244   /* We add size to the cost, so that we prefer eliminating ivs
5245      if possible.  */
5246   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5247                                             data->body_includes_call);
5248 }
5249
5250 /* For each size of the induction variable set determine the penalty.  */
5251
5252 static void
5253 determine_set_costs (struct ivopts_data *data)
5254 {
5255   unsigned j, n;
5256   gphi *phi;
5257   gphi_iterator psi;
5258   tree op;
5259   struct loop *loop = data->current_loop;
5260   bitmap_iterator bi;
5261
5262   if (dump_file && (dump_flags & TDF_DETAILS))
5263     {
5264       fprintf (dump_file, "Global costs:\n");
5265       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5266       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5267       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5268       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5269     }
5270
5271   n = 0;
5272   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5273     {
5274       phi = psi.phi ();
5275       op = PHI_RESULT (phi);
5276
5277       if (virtual_operand_p (op))
5278         continue;
5279
5280       if (get_iv (data, op))
5281         continue;
5282
5283       n++;
5284     }
5285
5286   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5287     {
5288       struct version_info *info = ver_info (data, j);
5289
5290       if (info->inv_id && info->has_nonlin_use)
5291         n++;
5292     }
5293
5294   data->regs_used = n;
5295   if (dump_file && (dump_flags & TDF_DETAILS))
5296     fprintf (dump_file, "  regs_used %d\n", n);
5297
5298   if (dump_file && (dump_flags & TDF_DETAILS))
5299     {
5300       fprintf (dump_file, "  cost for size:\n");
5301       fprintf (dump_file, "  ivs\tcost\n");
5302       for (j = 0; j <= 2 * target_avail_regs; j++)
5303         fprintf (dump_file, "  %d\t%d\n", j,
5304                  ivopts_global_cost_for_size (data, j));
5305       fprintf (dump_file, "\n");
5306     }
5307 }
5308
5309 /* Returns true if A is a cheaper cost pair than B.  */
5310
5311 static bool
5312 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5313 {
5314   int cmp;
5315
5316   if (!a)
5317     return false;
5318
5319   if (!b)
5320     return true;
5321
5322   cmp = compare_costs (a->cost, b->cost);
5323   if (cmp < 0)
5324     return true;
5325
5326   if (cmp > 0)
5327     return false;
5328
5329   /* In case the costs are the same, prefer the cheaper candidate.  */
5330   if (a->cand->cost < b->cand->cost)
5331     return true;
5332
5333   return false;
5334 }
5335
5336
5337 /* Returns candidate by that USE is expressed in IVS.  */
5338
5339 static struct cost_pair *
5340 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5341 {
5342   return ivs->cand_for_use[use->id];
5343 }
5344
5345 /* Computes the cost field of IVS structure.  */
5346
5347 static void
5348 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5349 {
5350   comp_cost cost = ivs->cand_use_cost;
5351
5352   cost.cost += ivs->cand_cost;
5353
5354   cost.cost += ivopts_global_cost_for_size (data,
5355                                             ivs->n_regs + ivs->num_used_inv_expr);
5356
5357   ivs->cost = cost;
5358 }
5359
5360 /* Remove invariants in set INVS to set IVS.  */
5361
5362 static void
5363 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5364 {
5365   bitmap_iterator bi;
5366   unsigned iid;
5367
5368   if (!invs)
5369     return;
5370
5371   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5372     {
5373       ivs->n_invariant_uses[iid]--;
5374       if (ivs->n_invariant_uses[iid] == 0)
5375         ivs->n_regs--;
5376     }
5377 }
5378
5379 /* Set USE not to be expressed by any candidate in IVS.  */
5380
5381 static void
5382 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5383                  struct iv_use *use)
5384 {
5385   unsigned uid = use->id, cid;
5386   struct cost_pair *cp;
5387
5388   cp = ivs->cand_for_use[uid];
5389   if (!cp)
5390     return;
5391   cid = cp->cand->id;
5392
5393   ivs->bad_uses++;
5394   ivs->cand_for_use[uid] = NULL;
5395   ivs->n_cand_uses[cid]--;
5396
5397   if (ivs->n_cand_uses[cid] == 0)
5398     {
5399       bitmap_clear_bit (ivs->cands, cid);
5400       /* Do not count the pseudocandidates.  */
5401       if (cp->cand->iv)
5402         ivs->n_regs--;
5403       ivs->n_cands--;
5404       ivs->cand_cost -= cp->cand->cost;
5405
5406       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5407     }
5408
5409   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5410
5411   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5412
5413   if (cp->inv_expr_id != -1)
5414     {
5415       ivs->used_inv_expr[cp->inv_expr_id]--;
5416       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5417         ivs->num_used_inv_expr--;
5418     }
5419   iv_ca_recount_cost (data, ivs);
5420 }
5421
5422 /* Add invariants in set INVS to set IVS.  */
5423
5424 static void
5425 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5426 {
5427   bitmap_iterator bi;
5428   unsigned iid;
5429
5430   if (!invs)
5431     return;
5432
5433   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5434     {
5435       ivs->n_invariant_uses[iid]++;
5436       if (ivs->n_invariant_uses[iid] == 1)
5437         ivs->n_regs++;
5438     }
5439 }
5440
5441 /* Set cost pair for USE in set IVS to CP.  */
5442
5443 static void
5444 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5445               struct iv_use *use, struct cost_pair *cp)
5446 {
5447   unsigned uid = use->id, cid;
5448
5449   if (ivs->cand_for_use[uid] == cp)
5450     return;
5451
5452   if (ivs->cand_for_use[uid])
5453     iv_ca_set_no_cp (data, ivs, use);
5454
5455   if (cp)
5456     {
5457       cid = cp->cand->id;
5458
5459       ivs->bad_uses--;
5460       ivs->cand_for_use[uid] = cp;
5461       ivs->n_cand_uses[cid]++;
5462       if (ivs->n_cand_uses[cid] == 1)
5463         {
5464           bitmap_set_bit (ivs->cands, cid);
5465           /* Do not count the pseudocandidates.  */
5466           if (cp->cand->iv)
5467             ivs->n_regs++;
5468           ivs->n_cands++;
5469           ivs->cand_cost += cp->cand->cost;
5470
5471           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5472         }
5473
5474       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5475       iv_ca_set_add_invariants (ivs, cp->depends_on);
5476
5477       if (cp->inv_expr_id != -1)
5478         {
5479           ivs->used_inv_expr[cp->inv_expr_id]++;
5480           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5481             ivs->num_used_inv_expr++;
5482         }
5483       iv_ca_recount_cost (data, ivs);
5484     }
5485 }
5486
5487 /* Extend set IVS by expressing USE by some of the candidates in it
5488    if possible.  Consider all important candidates if candidates in
5489    set IVS don't give any result.  */
5490
5491 static void
5492 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5493                struct iv_use *use)
5494 {
5495   struct cost_pair *best_cp = NULL, *cp;
5496   bitmap_iterator bi;
5497   unsigned i;
5498   struct iv_cand *cand;
5499
5500   gcc_assert (ivs->upto >= use->id);
5501   ivs->upto++;
5502   ivs->bad_uses++;
5503
5504   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5505     {
5506       cand = iv_cand (data, i);
5507       cp = get_use_iv_cost (data, use, cand);
5508       if (cheaper_cost_pair (cp, best_cp))
5509         best_cp = cp;
5510     }
5511
5512   if (best_cp == NULL)
5513     {
5514       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5515         {
5516           cand = iv_cand (data, i);
5517           cp = get_use_iv_cost (data, use, cand);
5518           if (cheaper_cost_pair (cp, best_cp))
5519             best_cp = cp;
5520         }
5521     }
5522
5523   iv_ca_set_cp (data, ivs, use, best_cp);
5524 }
5525
5526 /* Get cost for assignment IVS.  */
5527
5528 static comp_cost
5529 iv_ca_cost (struct iv_ca *ivs)
5530 {
5531   /* This was a conditional expression but it triggered a bug in
5532      Sun C 5.5.  */
5533   if (ivs->bad_uses)
5534     return infinite_cost;
5535   else
5536     return ivs->cost;
5537 }
5538
5539 /* Returns true if all dependences of CP are among invariants in IVS.  */
5540
5541 static bool
5542 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5543 {
5544   unsigned i;
5545   bitmap_iterator bi;
5546
5547   if (!cp->depends_on)
5548     return true;
5549
5550   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5551     {
5552       if (ivs->n_invariant_uses[i] == 0)
5553         return false;
5554     }
5555
5556   return true;
5557 }
5558
5559 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5560    it before NEXT_CHANGE.  */
5561
5562 static struct iv_ca_delta *
5563 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5564                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5565 {
5566   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5567
5568   change->use = use;
5569   change->old_cp = old_cp;
5570   change->new_cp = new_cp;
5571   change->next_change = next_change;
5572
5573   return change;
5574 }
5575
5576 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5577    are rewritten.  */
5578
5579 static struct iv_ca_delta *
5580 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5581 {
5582   struct iv_ca_delta *last;
5583
5584   if (!l2)
5585     return l1;
5586
5587   if (!l1)
5588     return l2;
5589
5590   for (last = l1; last->next_change; last = last->next_change)
5591     continue;
5592   last->next_change = l2;
5593
5594   return l1;
5595 }
5596
5597 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5598
5599 static struct iv_ca_delta *
5600 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5601 {
5602   struct iv_ca_delta *act, *next, *prev = NULL;
5603   struct cost_pair *tmp;
5604
5605   for (act = delta; act; act = next)
5606     {
5607       next = act->next_change;
5608       act->next_change = prev;
5609       prev = act;
5610
5611       tmp = act->old_cp;
5612       act->old_cp = act->new_cp;
5613       act->new_cp = tmp;
5614     }
5615
5616   return prev;
5617 }
5618
5619 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5620    reverted instead.  */
5621
5622 static void
5623 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5624                     struct iv_ca_delta *delta, bool forward)
5625 {
5626   struct cost_pair *from, *to;
5627   struct iv_ca_delta *act;
5628
5629   if (!forward)
5630     delta = iv_ca_delta_reverse (delta);
5631
5632   for (act = delta; act; act = act->next_change)
5633     {
5634       from = act->old_cp;
5635       to = act->new_cp;
5636       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5637       iv_ca_set_cp (data, ivs, act->use, to);
5638     }
5639
5640   if (!forward)
5641     iv_ca_delta_reverse (delta);
5642 }
5643
5644 /* Returns true if CAND is used in IVS.  */
5645
5646 static bool
5647 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5648 {
5649   return ivs->n_cand_uses[cand->id] > 0;
5650 }
5651
5652 /* Returns number of induction variable candidates in the set IVS.  */
5653
5654 static unsigned
5655 iv_ca_n_cands (struct iv_ca *ivs)
5656 {
5657   return ivs->n_cands;
5658 }
5659
5660 /* Free the list of changes DELTA.  */
5661
5662 static void
5663 iv_ca_delta_free (struct iv_ca_delta **delta)
5664 {
5665   struct iv_ca_delta *act, *next;
5666
5667   for (act = *delta; act; act = next)
5668     {
5669       next = act->next_change;
5670       free (act);
5671     }
5672
5673   *delta = NULL;
5674 }
5675
5676 /* Allocates new iv candidates assignment.  */
5677
5678 static struct iv_ca *
5679 iv_ca_new (struct ivopts_data *data)
5680 {
5681   struct iv_ca *nw = XNEW (struct iv_ca);
5682
5683   nw->upto = 0;
5684   nw->bad_uses = 0;
5685   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5686   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5687   nw->cands = BITMAP_ALLOC (NULL);
5688   nw->n_cands = 0;
5689   nw->n_regs = 0;
5690   nw->cand_use_cost = no_cost;
5691   nw->cand_cost = 0;
5692   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5693   nw->cost = no_cost;
5694   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5695   nw->num_used_inv_expr = 0;
5696
5697   return nw;
5698 }
5699
5700 /* Free memory occupied by the set IVS.  */
5701
5702 static void
5703 iv_ca_free (struct iv_ca **ivs)
5704 {
5705   free ((*ivs)->cand_for_use);
5706   free ((*ivs)->n_cand_uses);
5707   BITMAP_FREE ((*ivs)->cands);
5708   free ((*ivs)->n_invariant_uses);
5709   free ((*ivs)->used_inv_expr);
5710   free (*ivs);
5711   *ivs = NULL;
5712 }
5713
5714 /* Dumps IVS to FILE.  */
5715
5716 static void
5717 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5718 {
5719   const char *pref = "  invariants ";
5720   unsigned i;
5721   comp_cost cost = iv_ca_cost (ivs);
5722
5723   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5724   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5725            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5726   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5727
5728    for (i = 0; i < ivs->upto; i++)
5729     {
5730       struct iv_use *use = iv_use (data, i);
5731       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5732       if (cp)
5733         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5734                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5735       else
5736         fprintf (file, "   use:%d --> ??\n", use->id);
5737     }
5738
5739   for (i = 1; i <= data->max_inv_id; i++)
5740     if (ivs->n_invariant_uses[i])
5741       {
5742         fprintf (file, "%s%d", pref, i);
5743         pref = ", ";
5744       }
5745   fprintf (file, "\n\n");
5746 }
5747
5748 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5749    new set, and store differences in DELTA.  Number of induction variables
5750    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5751    the function will try to find a solution with mimimal iv candidates.  */
5752
5753 static comp_cost
5754 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5755               struct iv_cand *cand, struct iv_ca_delta **delta,
5756               unsigned *n_ivs, bool min_ncand)
5757 {
5758   unsigned i;
5759   comp_cost cost;
5760   struct iv_use *use;
5761   struct cost_pair *old_cp, *new_cp;
5762
5763   *delta = NULL;
5764   for (i = 0; i < ivs->upto; i++)
5765     {
5766       use = iv_use (data, i);
5767       old_cp = iv_ca_cand_for_use (ivs, use);
5768
5769       if (old_cp
5770           && old_cp->cand == cand)
5771         continue;
5772
5773       new_cp = get_use_iv_cost (data, use, cand);
5774       if (!new_cp)
5775         continue;
5776
5777       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5778         continue;
5779
5780       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5781         continue;
5782
5783       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5784     }
5785
5786   iv_ca_delta_commit (data, ivs, *delta, true);
5787   cost = iv_ca_cost (ivs);
5788   if (n_ivs)
5789     *n_ivs = iv_ca_n_cands (ivs);
5790   iv_ca_delta_commit (data, ivs, *delta, false);
5791
5792   return cost;
5793 }
5794
5795 /* Try narrowing set IVS by removing CAND.  Return the cost of
5796    the new set and store the differences in DELTA.  START is
5797    the candidate with which we start narrowing.  */
5798
5799 static comp_cost
5800 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5801               struct iv_cand *cand, struct iv_cand *start,
5802               struct iv_ca_delta **delta)
5803 {
5804   unsigned i, ci;
5805   struct iv_use *use;
5806   struct cost_pair *old_cp, *new_cp, *cp;
5807   bitmap_iterator bi;
5808   struct iv_cand *cnd;
5809   comp_cost cost, best_cost, acost;
5810
5811   *delta = NULL;
5812   for (i = 0; i < n_iv_uses (data); i++)
5813     {
5814       use = iv_use (data, i);
5815
5816       old_cp = iv_ca_cand_for_use (ivs, use);
5817       if (old_cp->cand != cand)
5818         continue;
5819
5820       best_cost = iv_ca_cost (ivs);
5821       /* Start narrowing with START.  */
5822       new_cp = get_use_iv_cost (data, use, start);
5823
5824       if (data->consider_all_candidates)
5825         {
5826           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5827             {
5828               if (ci == cand->id || (start && ci == start->id))
5829                 continue;
5830
5831               cnd = iv_cand (data, ci);
5832
5833               cp = get_use_iv_cost (data, use, cnd);
5834               if (!cp)
5835                 continue;
5836
5837               iv_ca_set_cp (data, ivs, use, cp);
5838               acost = iv_ca_cost (ivs);
5839
5840               if (compare_costs (acost, best_cost) < 0)
5841                 {
5842                   best_cost = acost;
5843                   new_cp = cp;
5844                 }
5845             }
5846         }
5847       else
5848         {
5849           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5850             {
5851               if (ci == cand->id || (start && ci == start->id))
5852                 continue;
5853
5854               cnd = iv_cand (data, ci);
5855
5856               cp = get_use_iv_cost (data, use, cnd);
5857               if (!cp)
5858                 continue;
5859
5860               iv_ca_set_cp (data, ivs, use, cp);
5861               acost = iv_ca_cost (ivs);
5862
5863               if (compare_costs (acost, best_cost) < 0)
5864                 {
5865                   best_cost = acost;
5866                   new_cp = cp;
5867                 }
5868             }
5869         }
5870       /* Restore to old cp for use.  */
5871       iv_ca_set_cp (data, ivs, use, old_cp);
5872
5873       if (!new_cp)
5874         {
5875           iv_ca_delta_free (delta);
5876           return infinite_cost;
5877         }
5878
5879       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5880     }
5881
5882   iv_ca_delta_commit (data, ivs, *delta, true);
5883   cost = iv_ca_cost (ivs);
5884   iv_ca_delta_commit (data, ivs, *delta, false);
5885
5886   return cost;
5887 }
5888
5889 /* Try optimizing the set of candidates IVS by removing candidates different
5890    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5891    differences in DELTA.  */
5892
5893 static comp_cost
5894 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5895              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5896 {
5897   bitmap_iterator bi;
5898   struct iv_ca_delta *act_delta, *best_delta;
5899   unsigned i;
5900   comp_cost best_cost, acost;
5901   struct iv_cand *cand;
5902
5903   best_delta = NULL;
5904   best_cost = iv_ca_cost (ivs);
5905
5906   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5907     {
5908       cand = iv_cand (data, i);
5909
5910       if (cand == except_cand)
5911         continue;
5912
5913       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5914
5915       if (compare_costs (acost, best_cost) < 0)
5916         {
5917           best_cost = acost;
5918           iv_ca_delta_free (&best_delta);
5919           best_delta = act_delta;
5920         }
5921       else
5922         iv_ca_delta_free (&act_delta);
5923     }
5924
5925   if (!best_delta)
5926     {
5927       *delta = NULL;
5928       return best_cost;
5929     }
5930
5931   /* Recurse to possibly remove other unnecessary ivs.  */
5932   iv_ca_delta_commit (data, ivs, best_delta, true);
5933   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5934   iv_ca_delta_commit (data, ivs, best_delta, false);
5935   *delta = iv_ca_delta_join (best_delta, *delta);
5936   return best_cost;
5937 }
5938
5939 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5940    cheaper local cost for USE than BEST_CP.  Return pointer to
5941    the corresponding cost_pair, otherwise just return BEST_CP.  */
5942
5943 static struct cost_pair*
5944 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5945                         unsigned int cand_idx, struct iv_cand *old_cand,
5946                         struct cost_pair *best_cp)
5947 {
5948   struct iv_cand *cand;
5949   struct cost_pair *cp;
5950
5951   gcc_assert (old_cand != NULL && best_cp != NULL);
5952   if (cand_idx == old_cand->id)
5953     return best_cp;
5954
5955   cand = iv_cand (data, cand_idx);
5956   cp = get_use_iv_cost (data, use, cand);
5957   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5958     return cp;
5959
5960   return best_cp;
5961 }
5962
5963 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5964    which are used by more than one iv uses.  For each of those candidates,
5965    this function tries to represent iv uses under that candidate using
5966    other ones with lower local cost, then tries to prune the new set.
5967    If the new set has lower cost, It returns the new cost after recording
5968    candidate replacement in list DELTA.  */
5969
5970 static comp_cost
5971 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5972                struct iv_ca_delta **delta)
5973 {
5974   bitmap_iterator bi, bj;
5975   unsigned int i, j, k;
5976   struct iv_use *use;
5977   struct iv_cand *cand;
5978   comp_cost orig_cost, acost;
5979   struct iv_ca_delta *act_delta, *tmp_delta;
5980   struct cost_pair *old_cp, *best_cp = NULL;
5981
5982   *delta = NULL;
5983   orig_cost = iv_ca_cost (ivs);
5984
5985   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5986     {
5987       if (ivs->n_cand_uses[i] == 1
5988           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5989         continue;
5990
5991       cand = iv_cand (data, i);
5992
5993       act_delta = NULL;
5994       /*  Represent uses under current candidate using other ones with
5995           lower local cost.  */
5996       for (j = 0; j < ivs->upto; j++)
5997         {
5998           use = iv_use (data, j);
5999           old_cp = iv_ca_cand_for_use (ivs, use);
6000
6001           if (old_cp->cand != cand)
6002             continue;
6003
6004           best_cp = old_cp;
6005           if (data->consider_all_candidates)
6006             for (k = 0; k < n_iv_cands (data); k++)
6007               best_cp = cheaper_cost_with_cand (data, use, k,
6008                                                 old_cp->cand, best_cp);
6009           else
6010             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6011               best_cp = cheaper_cost_with_cand (data, use, k,
6012                                                 old_cp->cand, best_cp);
6013
6014           if (best_cp == old_cp)
6015             continue;
6016
6017           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6018         }
6019       /* No need for further prune.  */
6020       if (!act_delta)
6021         continue;
6022
6023       /* Prune the new candidate set.  */
6024       iv_ca_delta_commit (data, ivs, act_delta, true);
6025       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6026       iv_ca_delta_commit (data, ivs, act_delta, false);
6027       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6028
6029       if (compare_costs (acost, orig_cost) < 0)
6030         {
6031           *delta = act_delta;
6032           return acost;
6033         }
6034       else
6035         iv_ca_delta_free (&act_delta);
6036     }
6037
6038   return orig_cost;
6039 }
6040
6041 /* Tries to extend the sets IVS in the best possible way in order
6042    to express the USE.  If ORIGINALP is true, prefer candidates from
6043    the original set of IVs, otherwise favor important candidates not
6044    based on any memory object.  */
6045
6046 static bool
6047 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6048                   struct iv_use *use, bool originalp)
6049 {
6050   comp_cost best_cost, act_cost;
6051   unsigned i;
6052   bitmap_iterator bi;
6053   struct iv_cand *cand;
6054   struct iv_ca_delta *best_delta = NULL, *act_delta;
6055   struct cost_pair *cp;
6056
6057   iv_ca_add_use (data, ivs, use);
6058   best_cost = iv_ca_cost (ivs);
6059   cp = iv_ca_cand_for_use (ivs, use);
6060   if (cp)
6061     {
6062       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6063       iv_ca_set_no_cp (data, ivs, use);
6064     }
6065
6066   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6067      first try important candidates not based on any memory object.  Only if
6068      this fails, try the specific ones.  Rationale -- in loops with many
6069      variables the best choice often is to use just one generic biv.  If we
6070      added here many ivs specific to the uses, the optimization algorithm later
6071      would be likely to get stuck in a local minimum, thus causing us to create
6072      too many ivs.  The approach from few ivs to more seems more likely to be
6073      successful -- starting from few ivs, replacing an expensive use by a
6074      specific iv should always be a win.  */
6075   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6076     {
6077       cand = iv_cand (data, i);
6078
6079       if (originalp && cand->pos !=IP_ORIGINAL)
6080         continue;
6081
6082       if (!originalp && cand->iv->base_object != NULL_TREE)
6083         continue;
6084
6085       if (iv_ca_cand_used_p (ivs, cand))
6086         continue;
6087
6088       cp = get_use_iv_cost (data, use, cand);
6089       if (!cp)
6090         continue;
6091
6092       iv_ca_set_cp (data, ivs, use, cp);
6093       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6094                                true);
6095       iv_ca_set_no_cp (data, ivs, use);
6096       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6097
6098       if (compare_costs (act_cost, best_cost) < 0)
6099         {
6100           best_cost = act_cost;
6101
6102           iv_ca_delta_free (&best_delta);
6103           best_delta = act_delta;
6104         }
6105       else
6106         iv_ca_delta_free (&act_delta);
6107     }
6108
6109   if (infinite_cost_p (best_cost))
6110     {
6111       for (i = 0; i < use->n_map_members; i++)
6112         {
6113           cp = use->cost_map + i;
6114           cand = cp->cand;
6115           if (!cand)
6116             continue;
6117
6118           /* Already tried this.  */
6119           if (cand->important)
6120             {
6121               if (originalp && cand->pos == IP_ORIGINAL)
6122                 continue;
6123               if (!originalp && cand->iv->base_object == NULL_TREE)
6124                 continue;
6125             }
6126
6127           if (iv_ca_cand_used_p (ivs, cand))
6128             continue;
6129
6130           act_delta = NULL;
6131           iv_ca_set_cp (data, ivs, use, cp);
6132           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6133           iv_ca_set_no_cp (data, ivs, use);
6134           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6135                                        cp, act_delta);
6136
6137           if (compare_costs (act_cost, best_cost) < 0)
6138             {
6139               best_cost = act_cost;
6140
6141               if (best_delta)
6142                 iv_ca_delta_free (&best_delta);
6143               best_delta = act_delta;
6144             }
6145           else
6146             iv_ca_delta_free (&act_delta);
6147         }
6148     }
6149
6150   iv_ca_delta_commit (data, ivs, best_delta, true);
6151   iv_ca_delta_free (&best_delta);
6152
6153   return !infinite_cost_p (best_cost);
6154 }
6155
6156 /* Finds an initial assignment of candidates to uses.  */
6157
6158 static struct iv_ca *
6159 get_initial_solution (struct ivopts_data *data, bool originalp)
6160 {
6161   struct iv_ca *ivs = iv_ca_new (data);
6162   unsigned i;
6163
6164   for (i = 0; i < n_iv_uses (data); i++)
6165     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6166       {
6167         iv_ca_free (&ivs);
6168         return NULL;
6169       }
6170
6171   return ivs;
6172 }
6173
6174 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6175    points to a bool variable, this function tries to break local
6176    optimal fixed-point by replacing candidates in IVS if it's true.  */
6177
6178 static bool
6179 try_improve_iv_set (struct ivopts_data *data,
6180                     struct iv_ca *ivs, bool *try_replace_p)
6181 {
6182   unsigned i, n_ivs;
6183   comp_cost acost, best_cost = iv_ca_cost (ivs);
6184   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6185   struct iv_cand *cand;
6186
6187   /* Try extending the set of induction variables by one.  */
6188   for (i = 0; i < n_iv_cands (data); i++)
6189     {
6190       cand = iv_cand (data, i);
6191
6192       if (iv_ca_cand_used_p (ivs, cand))
6193         continue;
6194
6195       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6196       if (!act_delta)
6197         continue;
6198
6199       /* If we successfully added the candidate and the set is small enough,
6200          try optimizing it by removing other candidates.  */
6201       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6202         {
6203           iv_ca_delta_commit (data, ivs, act_delta, true);
6204           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6205           iv_ca_delta_commit (data, ivs, act_delta, false);
6206           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6207         }
6208
6209       if (compare_costs (acost, best_cost) < 0)
6210         {
6211           best_cost = acost;
6212           iv_ca_delta_free (&best_delta);
6213           best_delta = act_delta;
6214         }
6215       else
6216         iv_ca_delta_free (&act_delta);
6217     }
6218
6219   if (!best_delta)
6220     {
6221       /* Try removing the candidates from the set instead.  */
6222       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6223
6224       if (!best_delta && *try_replace_p)
6225         {
6226           *try_replace_p = false;
6227           /* So far candidate selecting algorithm tends to choose fewer IVs
6228              so that it can handle cases in which loops have many variables
6229              but the best choice is often to use only one general biv.  One
6230              weakness is it can't handle opposite cases, in which different
6231              candidates should be chosen with respect to each use.  To solve
6232              the problem, we replace candidates in a manner described by the
6233              comments of iv_ca_replace, thus give general algorithm a chance
6234              to break local optimal fixed-point in these cases.  */
6235           best_cost = iv_ca_replace (data, ivs, &best_delta);
6236         }
6237
6238       if (!best_delta)
6239         return false;
6240     }
6241
6242   iv_ca_delta_commit (data, ivs, best_delta, true);
6243   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6244   iv_ca_delta_free (&best_delta);
6245   return true;
6246 }
6247
6248 /* Attempts to find the optimal set of induction variables.  We do simple
6249    greedy heuristic -- we try to replace at most one candidate in the selected
6250    solution and remove the unused ivs while this improves the cost.  */
6251
6252 static struct iv_ca *
6253 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6254 {
6255   struct iv_ca *set;
6256   bool try_replace_p = true;
6257
6258   /* Get the initial solution.  */
6259   set = get_initial_solution (data, originalp);
6260   if (!set)
6261     {
6262       if (dump_file && (dump_flags & TDF_DETAILS))
6263         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6264       return NULL;
6265     }
6266
6267   if (dump_file && (dump_flags & TDF_DETAILS))
6268     {
6269       fprintf (dump_file, "Initial set of candidates:\n");
6270       iv_ca_dump (data, dump_file, set);
6271     }
6272
6273   while (try_improve_iv_set (data, set, &try_replace_p))
6274     {
6275       if (dump_file && (dump_flags & TDF_DETAILS))
6276         {
6277           fprintf (dump_file, "Improved to:\n");
6278           iv_ca_dump (data, dump_file, set);
6279         }
6280     }
6281
6282   return set;
6283 }
6284
6285 static struct iv_ca *
6286 find_optimal_iv_set (struct ivopts_data *data)
6287 {
6288   unsigned i;
6289   struct iv_ca *set, *origset;
6290   struct iv_use *use;
6291   comp_cost cost, origcost;
6292
6293   /* Determine the cost based on a strategy that starts with original IVs,
6294      and try again using a strategy that prefers candidates not based
6295      on any IVs.  */
6296   origset = find_optimal_iv_set_1 (data, true);
6297   set = find_optimal_iv_set_1 (data, false);
6298
6299   if (!origset && !set)
6300     return NULL;
6301
6302   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6303   cost = set ? iv_ca_cost (set) : infinite_cost;
6304
6305   if (dump_file && (dump_flags & TDF_DETAILS))
6306     {
6307       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6308                origcost.cost, origcost.complexity);
6309       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6310                cost.cost, cost.complexity);
6311     }
6312
6313   /* Choose the one with the best cost.  */
6314   if (compare_costs (origcost, cost) <= 0)
6315     {
6316       if (set)
6317         iv_ca_free (&set);
6318       set = origset;
6319     }
6320   else if (origset)
6321     iv_ca_free (&origset);
6322
6323   for (i = 0; i < n_iv_uses (data); i++)
6324     {
6325       use = iv_use (data, i);
6326       use->selected = iv_ca_cand_for_use (set, use)->cand;
6327     }
6328
6329   return set;
6330 }
6331
6332 /* Creates a new induction variable corresponding to CAND.  */
6333
6334 static void
6335 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6336 {
6337   gimple_stmt_iterator incr_pos;
6338   tree base;
6339   bool after = false;
6340
6341   if (!cand->iv)
6342     return;
6343
6344   switch (cand->pos)
6345     {
6346     case IP_NORMAL:
6347       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6348       break;
6349
6350     case IP_END:
6351       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6352       after = true;
6353       break;
6354
6355     case IP_AFTER_USE:
6356       after = true;
6357       /* fall through */
6358     case IP_BEFORE_USE:
6359       incr_pos = gsi_for_stmt (cand->incremented_at);
6360       break;
6361
6362     case IP_ORIGINAL:
6363       /* Mark that the iv is preserved.  */
6364       name_info (data, cand->var_before)->preserve_biv = true;
6365       name_info (data, cand->var_after)->preserve_biv = true;
6366
6367       /* Rewrite the increment so that it uses var_before directly.  */
6368       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6369       return;
6370     }
6371
6372   gimple_add_tmp_var (cand->var_before);
6373
6374   base = unshare_expr (cand->iv->base);
6375
6376   create_iv (base, unshare_expr (cand->iv->step),
6377              cand->var_before, data->current_loop,
6378              &incr_pos, after, &cand->var_before, &cand->var_after);
6379 }
6380
6381 /* Creates new induction variables described in SET.  */
6382
6383 static void
6384 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6385 {
6386   unsigned i;
6387   struct iv_cand *cand;
6388   bitmap_iterator bi;
6389
6390   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6391     {
6392       cand = iv_cand (data, i);
6393       create_new_iv (data, cand);
6394     }
6395
6396   if (dump_file && (dump_flags & TDF_DETAILS))
6397     {
6398       fprintf (dump_file, "Selected IV set for loop %d",
6399                data->current_loop->num);
6400       if (data->loop_loc != UNKNOWN_LOCATION)
6401         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6402                  LOCATION_LINE (data->loop_loc));
6403       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6404       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6405         {
6406           cand = iv_cand (data, i);
6407           dump_cand (dump_file, cand);
6408         }
6409       fprintf (dump_file, "\n");
6410     }
6411 }
6412
6413 /* Rewrites USE (definition of iv used in a nonlinear expression)
6414    using candidate CAND.  */
6415
6416 static void
6417 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6418                             struct iv_use *use, struct iv_cand *cand)
6419 {
6420   tree comp;
6421   tree op, tgt;
6422   gassign *ass;
6423   gimple_stmt_iterator bsi;
6424
6425   /* An important special case -- if we are asked to express value of
6426      the original iv by itself, just exit; there is no need to
6427      introduce a new computation (that might also need casting the
6428      variable to unsigned and back).  */
6429   if (cand->pos == IP_ORIGINAL
6430       && cand->incremented_at == use->stmt)
6431     {
6432       enum tree_code stmt_code;
6433
6434       gcc_assert (is_gimple_assign (use->stmt));
6435       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6436
6437       /* Check whether we may leave the computation unchanged.
6438          This is the case only if it does not rely on other
6439          computations in the loop -- otherwise, the computation
6440          we rely upon may be removed in remove_unused_ivs,
6441          thus leading to ICE.  */
6442       stmt_code = gimple_assign_rhs_code (use->stmt);
6443       if (stmt_code == PLUS_EXPR
6444           || stmt_code == MINUS_EXPR
6445           || stmt_code == POINTER_PLUS_EXPR)
6446         {
6447           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6448             op = gimple_assign_rhs2 (use->stmt);
6449           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6450             op = gimple_assign_rhs1 (use->stmt);
6451           else
6452             op = NULL_TREE;
6453         }
6454       else
6455         op = NULL_TREE;
6456
6457       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6458         return;
6459     }
6460
6461   comp = get_computation (data->current_loop, use, cand);
6462   gcc_assert (comp != NULL_TREE);
6463
6464   switch (gimple_code (use->stmt))
6465     {
6466     case GIMPLE_PHI:
6467       tgt = PHI_RESULT (use->stmt);
6468
6469       /* If we should keep the biv, do not replace it.  */
6470       if (name_info (data, tgt)->preserve_biv)
6471         return;
6472
6473       bsi = gsi_after_labels (gimple_bb (use->stmt));
6474       break;
6475
6476     case GIMPLE_ASSIGN:
6477       tgt = gimple_assign_lhs (use->stmt);
6478       bsi = gsi_for_stmt (use->stmt);
6479       break;
6480
6481     default:
6482       gcc_unreachable ();
6483     }
6484
6485   if (!valid_gimple_rhs_p (comp)
6486       || (gimple_code (use->stmt) != GIMPLE_PHI
6487           /* We can't allow re-allocating the stmt as it might be pointed
6488              to still.  */
6489           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6490               >= gimple_num_ops (gsi_stmt (bsi)))))
6491     {
6492       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6493                                        true, GSI_SAME_STMT);
6494       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6495         {
6496           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6497           /* As this isn't a plain copy we have to reset alignment
6498              information.  */
6499           if (SSA_NAME_PTR_INFO (comp))
6500             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6501         }
6502     }
6503
6504   if (gimple_code (use->stmt) == GIMPLE_PHI)
6505     {
6506       ass = gimple_build_assign (tgt, comp);
6507       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6508
6509       bsi = gsi_for_stmt (use->stmt);
6510       remove_phi_node (&bsi, false);
6511     }
6512   else
6513     {
6514       gimple_assign_set_rhs_from_tree (&bsi, comp);
6515       use->stmt = gsi_stmt (bsi);
6516     }
6517 }
6518
6519 /* Performs a peephole optimization to reorder the iv update statement with
6520    a mem ref to enable instruction combining in later phases. The mem ref uses
6521    the iv value before the update, so the reordering transformation requires
6522    adjustment of the offset. CAND is the selected IV_CAND.
6523
6524    Example:
6525
6526    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6527    iv2 = iv1 + 1;
6528
6529    if (t < val)      (1)
6530      goto L;
6531    goto Head;
6532
6533
6534    directly propagating t over to (1) will introduce overlapping live range
6535    thus increase register pressure. This peephole transform it into:
6536
6537
6538    iv2 = iv1 + 1;
6539    t = MEM_REF (base, iv2, 8, 8);
6540    if (t < val)
6541      goto L;
6542    goto Head;
6543 */
6544
6545 static void
6546 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6547 {
6548   tree var_after;
6549   gimple iv_update, stmt;
6550   basic_block bb;
6551   gimple_stmt_iterator gsi, gsi_iv;
6552
6553   if (cand->pos != IP_NORMAL)
6554     return;
6555
6556   var_after = cand->var_after;
6557   iv_update = SSA_NAME_DEF_STMT (var_after);
6558
6559   bb = gimple_bb (iv_update);
6560   gsi = gsi_last_nondebug_bb (bb);
6561   stmt = gsi_stmt (gsi);
6562
6563   /* Only handle conditional statement for now.  */
6564   if (gimple_code (stmt) != GIMPLE_COND)
6565     return;
6566
6567   gsi_prev_nondebug (&gsi);
6568   stmt = gsi_stmt (gsi);
6569   if (stmt != iv_update)
6570     return;
6571
6572   gsi_prev_nondebug (&gsi);
6573   if (gsi_end_p (gsi))
6574     return;
6575
6576   stmt = gsi_stmt (gsi);
6577   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6578     return;
6579
6580   if (stmt != use->stmt)
6581     return;
6582
6583   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6584     return;
6585
6586   if (dump_file && (dump_flags & TDF_DETAILS))
6587     {
6588       fprintf (dump_file, "Reordering \n");
6589       print_gimple_stmt (dump_file, iv_update, 0, 0);
6590       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6591       fprintf (dump_file, "\n");
6592     }
6593
6594   gsi = gsi_for_stmt (use->stmt);
6595   gsi_iv = gsi_for_stmt (iv_update);
6596   gsi_move_before (&gsi_iv, &gsi);
6597
6598   cand->pos = IP_BEFORE_USE;
6599   cand->incremented_at = use->stmt;
6600 }
6601
6602 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6603
6604 static void
6605 rewrite_use_address (struct ivopts_data *data,
6606                      struct iv_use *use, struct iv_cand *cand)
6607 {
6608   aff_tree aff;
6609   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6610   tree base_hint = NULL_TREE;
6611   tree ref, iv;
6612   bool ok;
6613
6614   adjust_iv_update_pos (cand, use);
6615   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6616   gcc_assert (ok);
6617   unshare_aff_combination (&aff);
6618
6619   /* To avoid undefined overflow problems, all IV candidates use unsigned
6620      integer types.  The drawback is that this makes it impossible for
6621      create_mem_ref to distinguish an IV that is based on a memory object
6622      from one that represents simply an offset.
6623
6624      To work around this problem, we pass a hint to create_mem_ref that
6625      indicates which variable (if any) in aff is an IV based on a memory
6626      object.  Note that we only consider the candidate.  If this is not
6627      based on an object, the base of the reference is in some subexpression
6628      of the use -- but these will use pointer types, so they are recognized
6629      by the create_mem_ref heuristics anyway.  */
6630   if (cand->iv->base_object)
6631     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6632
6633   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6634   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6635                         reference_alias_ptr_type (*use->op_p),
6636                         iv, base_hint, data->speed);
6637   copy_ref_info (ref, *use->op_p);
6638   *use->op_p = ref;
6639 }
6640
6641 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6642    candidate CAND.  */
6643
6644 static void
6645 rewrite_use_compare (struct ivopts_data *data,
6646                      struct iv_use *use, struct iv_cand *cand)
6647 {
6648   tree comp, *var_p, op, bound;
6649   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6650   enum tree_code compare;
6651   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6652   bool ok;
6653
6654   bound = cp->value;
6655   if (bound)
6656     {
6657       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6658       tree var_type = TREE_TYPE (var);
6659       gimple_seq stmts;
6660
6661       if (dump_file && (dump_flags & TDF_DETAILS))
6662         {
6663           fprintf (dump_file, "Replacing exit test: ");
6664           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6665         }
6666       compare = cp->comp;
6667       bound = unshare_expr (fold_convert (var_type, bound));
6668       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6669       if (stmts)
6670         gsi_insert_seq_on_edge_immediate (
6671                 loop_preheader_edge (data->current_loop),
6672                 stmts);
6673
6674       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6675       gimple_cond_set_lhs (cond_stmt, var);
6676       gimple_cond_set_code (cond_stmt, compare);
6677       gimple_cond_set_rhs (cond_stmt, op);
6678       return;
6679     }
6680
6681   /* The induction variable elimination failed; just express the original
6682      giv.  */
6683   comp = get_computation (data->current_loop, use, cand);
6684   gcc_assert (comp != NULL_TREE);
6685
6686   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6687   gcc_assert (ok);
6688
6689   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6690                                      true, GSI_SAME_STMT);
6691 }
6692
6693 /* Rewrites USE using candidate CAND.  */
6694
6695 static void
6696 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6697 {
6698   switch (use->type)
6699     {
6700       case USE_NONLINEAR_EXPR:
6701         rewrite_use_nonlinear_expr (data, use, cand);
6702         break;
6703
6704       case USE_ADDRESS:
6705         rewrite_use_address (data, use, cand);
6706         break;
6707
6708       case USE_COMPARE:
6709         rewrite_use_compare (data, use, cand);
6710         break;
6711
6712       default:
6713         gcc_unreachable ();
6714     }
6715
6716   update_stmt (use->stmt);
6717 }
6718
6719 /* Rewrite the uses using the selected induction variables.  */
6720
6721 static void
6722 rewrite_uses (struct ivopts_data *data)
6723 {
6724   unsigned i;
6725   struct iv_cand *cand;
6726   struct iv_use *use;
6727
6728   for (i = 0; i < n_iv_uses (data); i++)
6729     {
6730       use = iv_use (data, i);
6731       cand = use->selected;
6732       gcc_assert (cand);
6733
6734       rewrite_use (data, use, cand);
6735     }
6736 }
6737
6738 /* Removes the ivs that are not used after rewriting.  */
6739
6740 static void
6741 remove_unused_ivs (struct ivopts_data *data)
6742 {
6743   unsigned j;
6744   bitmap_iterator bi;
6745   bitmap toremove = BITMAP_ALLOC (NULL);
6746
6747   /* Figure out an order in which to release SSA DEFs so that we don't
6748      release something that we'd have to propagate into a debug stmt
6749      afterwards.  */
6750   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6751     {
6752       struct version_info *info;
6753
6754       info = ver_info (data, j);
6755       if (info->iv
6756           && !integer_zerop (info->iv->step)
6757           && !info->inv_id
6758           && !info->iv->have_use_for
6759           && !info->preserve_biv)
6760         {
6761           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6762
6763           tree def = info->iv->ssa_name;
6764
6765           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6766             {
6767               imm_use_iterator imm_iter;
6768               use_operand_p use_p;
6769               gimple stmt;
6770               int count = 0;
6771
6772               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6773                 {
6774                   if (!gimple_debug_bind_p (stmt))
6775                     continue;
6776
6777                   /* We just want to determine whether to do nothing
6778                      (count == 0), to substitute the computed
6779                      expression into a single use of the SSA DEF by
6780                      itself (count == 1), or to use a debug temp
6781                      because the SSA DEF is used multiple times or as
6782                      part of a larger expression (count > 1). */
6783                   count++;
6784                   if (gimple_debug_bind_get_value (stmt) != def)
6785                     count++;
6786
6787                   if (count > 1)
6788                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6789                 }
6790
6791               if (!count)
6792                 continue;
6793
6794               struct iv_use dummy_use;
6795               struct iv_cand *best_cand = NULL, *cand;
6796               unsigned i, best_pref = 0, cand_pref;
6797
6798               memset (&dummy_use, 0, sizeof (dummy_use));
6799               dummy_use.iv = info->iv;
6800               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6801                 {
6802                   cand = iv_use (data, i)->selected;
6803                   if (cand == best_cand)
6804                     continue;
6805                   cand_pref = operand_equal_p (cand->iv->step,
6806                                                info->iv->step, 0)
6807                     ? 4 : 0;
6808                   cand_pref
6809                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6810                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6811                     ? 2 : 0;
6812                   cand_pref
6813                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6814                     ? 1 : 0;
6815                   if (best_cand == NULL || best_pref < cand_pref)
6816                     {
6817                       best_cand = cand;
6818                       best_pref = cand_pref;
6819                     }
6820                 }
6821
6822               if (!best_cand)
6823                 continue;
6824
6825               tree comp = get_computation_at (data->current_loop,
6826                                               &dummy_use, best_cand,
6827                                               SSA_NAME_DEF_STMT (def));
6828               if (!comp)
6829                 continue;
6830
6831               if (count > 1)
6832                 {
6833                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6834                   DECL_ARTIFICIAL (vexpr) = 1;
6835                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6836                   if (SSA_NAME_VAR (def))
6837                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6838                   else
6839                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6840                   gdebug *def_temp
6841                     = gimple_build_debug_bind (vexpr, comp, NULL);
6842                   gimple_stmt_iterator gsi;
6843
6844                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6845                     gsi = gsi_after_labels (gimple_bb
6846                                             (SSA_NAME_DEF_STMT (def)));
6847                   else
6848                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6849
6850                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6851                   comp = vexpr;
6852                 }
6853
6854               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6855                 {
6856                   if (!gimple_debug_bind_p (stmt))
6857                     continue;
6858
6859                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6860                     SET_USE (use_p, comp);
6861
6862                   update_stmt (stmt);
6863                 }
6864             }
6865         }
6866     }
6867
6868   release_defs_bitset (toremove);
6869
6870   BITMAP_FREE (toremove);
6871 }
6872
6873 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6874    for hash_map::traverse.  */
6875
6876 bool
6877 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6878 {
6879   free (value);
6880   return true;
6881 }
6882
6883 /* Frees data allocated by the optimization of a single loop.  */
6884
6885 static void
6886 free_loop_data (struct ivopts_data *data)
6887 {
6888   unsigned i, j;
6889   bitmap_iterator bi;
6890   tree obj;
6891
6892   if (data->niters)
6893     {
6894       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6895       delete data->niters;
6896       data->niters = NULL;
6897     }
6898
6899   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6900     {
6901       struct version_info *info;
6902
6903       info = ver_info (data, i);
6904       free (info->iv);
6905       info->iv = NULL;
6906       info->has_nonlin_use = false;
6907       info->preserve_biv = false;
6908       info->inv_id = 0;
6909     }
6910   bitmap_clear (data->relevant);
6911   bitmap_clear (data->important_candidates);
6912
6913   for (i = 0; i < n_iv_uses (data); i++)
6914     {
6915       struct iv_use *use = iv_use (data, i);
6916
6917       free (use->iv);
6918       BITMAP_FREE (use->related_cands);
6919       for (j = 0; j < use->n_map_members; j++)
6920         if (use->cost_map[j].depends_on)
6921           BITMAP_FREE (use->cost_map[j].depends_on);
6922       free (use->cost_map);
6923       free (use);
6924     }
6925   data->iv_uses.truncate (0);
6926
6927   for (i = 0; i < n_iv_cands (data); i++)
6928     {
6929       struct iv_cand *cand = iv_cand (data, i);
6930
6931       free (cand->iv);
6932       if (cand->depends_on)
6933         BITMAP_FREE (cand->depends_on);
6934       free (cand);
6935     }
6936   data->iv_candidates.truncate (0);
6937
6938   if (data->version_info_size < num_ssa_names)
6939     {
6940       data->version_info_size = 2 * num_ssa_names;
6941       free (data->version_info);
6942       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6943     }
6944
6945   data->max_inv_id = 0;
6946
6947   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6948     SET_DECL_RTL (obj, NULL_RTX);
6949
6950   decl_rtl_to_reset.truncate (0);
6951
6952   data->inv_expr_tab->empty ();
6953   data->inv_expr_id = 0;
6954 }
6955
6956 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6957    loop tree.  */
6958
6959 static void
6960 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6961 {
6962   free_loop_data (data);
6963   free (data->version_info);
6964   BITMAP_FREE (data->relevant);
6965   BITMAP_FREE (data->important_candidates);
6966
6967   decl_rtl_to_reset.release ();
6968   data->iv_uses.release ();
6969   data->iv_candidates.release ();
6970   delete data->inv_expr_tab;
6971   data->inv_expr_tab = NULL;
6972   free_affine_expand_cache (&data->name_expansion_cache);
6973 }
6974
6975 /* Returns true if the loop body BODY includes any function calls.  */
6976
6977 static bool
6978 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6979 {
6980   gimple_stmt_iterator gsi;
6981   unsigned i;
6982
6983   for (i = 0; i < num_nodes; i++)
6984     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6985       {
6986         gimple stmt = gsi_stmt (gsi);
6987         if (is_gimple_call (stmt)
6988             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6989           return true;
6990       }
6991   return false;
6992 }
6993
6994 /* Optimizes the LOOP.  Returns true if anything changed.  */
6995
6996 static bool
6997 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6998 {
6999   bool changed = false;
7000   struct iv_ca *iv_ca;
7001   edge exit = single_dom_exit (loop);
7002   basic_block *body;
7003
7004   gcc_assert (!data->niters);
7005   data->current_loop = loop;
7006   data->loop_loc = find_loop_location (loop);
7007   data->speed = optimize_loop_for_speed_p (loop);
7008
7009   if (dump_file && (dump_flags & TDF_DETAILS))
7010     {
7011       fprintf (dump_file, "Processing loop %d", loop->num);
7012       if (data->loop_loc != UNKNOWN_LOCATION)
7013         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7014                  LOCATION_LINE (data->loop_loc));
7015       fprintf (dump_file, "\n");
7016
7017       if (exit)
7018         {
7019           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7020                    exit->src->index, exit->dest->index);
7021           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7022           fprintf (dump_file, "\n");
7023         }
7024
7025       fprintf (dump_file, "\n");
7026     }
7027
7028   body = get_loop_body (loop);
7029   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7030   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7031   free (body);
7032
7033   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7034
7035   /* For each ssa name determines whether it behaves as an induction variable
7036      in some loop.  */
7037   if (!find_induction_variables (data))
7038     goto finish;
7039
7040   /* Finds interesting uses (item 1).  */
7041   find_interesting_uses (data);
7042   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7043     goto finish;
7044
7045   /* Finds candidates for the induction variables (item 2).  */
7046   find_iv_candidates (data);
7047
7048   /* Calculates the costs (item 3, part 1).  */
7049   determine_iv_costs (data);
7050   determine_use_iv_costs (data);
7051   determine_set_costs (data);
7052
7053   /* Find the optimal set of induction variables (item 3, part 2).  */
7054   iv_ca = find_optimal_iv_set (data);
7055   if (!iv_ca)
7056     goto finish;
7057   changed = true;
7058
7059   /* Create the new induction variables (item 4, part 1).  */
7060   create_new_ivs (data, iv_ca);
7061   iv_ca_free (&iv_ca);
7062
7063   /* Rewrite the uses (item 4, part 2).  */
7064   rewrite_uses (data);
7065
7066   /* Remove the ivs that are unused after rewriting.  */
7067   remove_unused_ivs (data);
7068
7069   /* We have changed the structure of induction variables; it might happen
7070      that definitions in the scev database refer to some of them that were
7071      eliminated.  */
7072   scev_reset ();
7073
7074 finish:
7075   free_loop_data (data);
7076
7077   return changed;
7078 }
7079
7080 /* Main entry point.  Optimizes induction variables in loops.  */
7081
7082 void
7083 tree_ssa_iv_optimize (void)
7084 {
7085   struct loop *loop;
7086   struct ivopts_data data;
7087
7088   tree_ssa_iv_optimize_init (&data);
7089
7090   /* Optimize the loops starting with the innermost ones.  */
7091   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7092     {
7093       if (dump_file && (dump_flags & TDF_DETAILS))
7094         flow_loop_dump (loop, dump_file, NULL, 1);
7095
7096       tree_ssa_iv_optimize_loop (&data, loop);
7097     }
7098
7099   tree_ssa_iv_optimize_finalize (&data);
7100 }