gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "hash-set.h"
  69 #include "machmode.h"
  70 #include "vec.h"
  71 #include "double-int.h"
  72 #include "input.h"
  73 #include "alias.h"
  74 #include "symtab.h"
  75 #include "wide-int.h"
  76 #include "inchash.h"
  77 #include "tree.h"
  78 #include "fold-const.h"
  79 #include "stor-layout.h"
  80 #include "tm_p.h"
  81 #include "predict.h"
  82 #include "hard-reg-set.h"
  83 #include "function.h"
  84 #include "dominance.h"
  85 #include "cfg.h"
  86 #include "basic-block.h"
  87 #include "gimple-pretty-print.h"
  88 #include "hash-map.h"
  89 #include "hash-table.h"
  90 #include "tree-ssa-alias.h"
  91 #include "internal-fn.h"
  92 #include "tree-eh.h"
  93 #include "gimple-expr.h"
  94 #include "is-a.h"
  95 #include "gimple.h"
  96 #include "gimplify.h"
  97 #include "gimple-iterator.h"
  98 #include "gimplify-me.h"
  99 #include "gimple-ssa.h"
 100 #include "plugin-api.h"
 101 #include "ipa-ref.h"
 102 #include "cgraph.h"
 103 #include "tree-cfg.h"
 104 #include "tree-phinodes.h"
 105 #include "ssa-iterators.h"
 106 #include "stringpool.h"
 107 #include "tree-ssanames.h"
 108 #include "tree-ssa-loop-ivopts.h"
 109 #include "tree-ssa-loop-manip.h"
 110 #include "tree-ssa-loop-niter.h"
 111 #include "tree-ssa-loop.h"
 112 #include "hashtab.h"
 113 #include "rtl.h"
 114 #include "flags.h"
 115 #include "statistics.h"
 116 #include "real.h"
 117 #include "fixed-value.h"
 118 #include "insn-config.h"
 119 #include "expmed.h"
 120 #include "dojump.h"
 121 #include "explow.h"
 122 #include "calls.h"
 123 #include "emit-rtl.h"
 124 #include "varasm.h"
 125 #include "stmt.h"
 126 #include "expr.h"
 127 #include "tree-dfa.h"
 128 #include "tree-ssa.h"
 129 #include "cfgloop.h"
 130 #include "tree-pass.h"
 131 #include "tree-chrec.h"
 132 #include "tree-scalar-evolution.h"
 133 #include "params.h"
 134 #include "langhooks.h"
 135 #include "tree-affine.h"
 136 #include "target.h"
 137 #include "tree-inline.h"
 138 #include "tree-ssa-propagate.h"
 139 #include "tree-ssa-address.h"
 140 #include "builtins.h"
 141 #include "tree-vectorizer.h"
 142
 143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 144    cost of different addressing modes.  This should be moved to a TBD
 145    interface between the GIMPLE and RTL worlds.  */
 146 #include "recog.h"
 147
 148 /* The infinite cost.  */
 149 #define INFTY 10000000
 150
 151 #define AVG_LOOP_NITER(LOOP) 5
 152
 153 /* Returns the expected number of loop iterations for LOOP.
 154    The average trip count is computed from profile data if it
 155    exists. */
 156
 157 static inline HOST_WIDE_INT
 158 avg_loop_niter (struct loop *loop)
 159 {
 160   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 161   if (niter == -1)
 162     return AVG_LOOP_NITER (loop);
 163
 164   return niter;
 165 }
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   bool biv_p;           /* Is it a biv?  */
 175   bool have_use_for;    /* Do we already have a use for it?  */
 176   unsigned use_id;      /* The identifier in the use if it is the case.  */
 177 };
 178
 179 /* Per-ssa version information (induction variable descriptions, etc.).  */
 180 struct version_info
 181 {
 182   tree name;            /* The ssa name.  */
 183   struct iv *iv;        /* Induction variable description.  */
 184   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 185                            an expression that is not an induction variable.  */
 186   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 187   unsigned inv_id;      /* Id of an invariant.  */
 188 };
 189
 190 /* Types of uses.  */
 191 enum use_type
 192 {
 193   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 194   USE_ADDRESS,          /* Use in an address.  */
 195   USE_COMPARE           /* Use is a compare.  */
 196 };
 197
 198 /* Cost of a computation.  */
 199 typedef struct
 200 {
 201   int cost;             /* The runtime cost.  */
 202   unsigned complexity;  /* The estimate of the complexity of the code for
 203                            the computation (in no concrete units --
 204                            complexity field should be larger for more
 205                            complex expressions and addressing modes).  */
 206 } comp_cost;
 207
 208 static const comp_cost no_cost = {0, 0};
 209 static const comp_cost infinite_cost = {INFTY, INFTY};
 210
 211 /* The candidate - cost pair.  */
 212 struct cost_pair
 213 {
 214   struct iv_cand *cand; /* The candidate.  */
 215   comp_cost cost;       /* The cost.  */
 216   bitmap depends_on;    /* The list of invariants that have to be
 217                            preserved.  */
 218   tree value;           /* For final value elimination, the expression for
 219                            the final value of the iv.  For iv elimination,
 220                            the new bound to compare with.  */
 221   enum tree_code comp;  /* For iv elimination, the comparison.  */
 222   int inv_expr_id;      /* Loop invariant expression id.  */
 223 };
 224
 225 /* Use.  */
 226 struct iv_use
 227 {
 228   unsigned id;          /* The id of the use.  */
 229   enum use_type type;   /* Type of the use.  */
 230   struct iv *iv;        /* The induction variable it is based on.  */
 231   gimple stmt;          /* Statement in that it occurs.  */
 232   tree *op_p;           /* The place where it occurs.  */
 233   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 234                            important ones.  */
 235
 236   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 237   struct cost_pair *cost_map;
 238                         /* The costs wrto the iv candidates.  */
 239
 240   struct iv_cand *selected;
 241                         /* The selected candidate.  */
 242 };
 243
 244 /* The position where the iv is computed.  */
 245 enum iv_position
 246 {
 247   IP_NORMAL,            /* At the end, just before the exit condition.  */
 248   IP_END,               /* At the end of the latch block.  */
 249   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 250   IP_AFTER_USE,         /* Immediately after a specific use.  */
 251   IP_ORIGINAL           /* The original biv.  */
 252 };
 253
 254 /* The induction variable candidate.  */
 255 struct iv_cand
 256 {
 257   unsigned id;          /* The number of the candidate.  */
 258   bool important;       /* Whether this is an "important" candidate, i.e. such
 259                            that it should be considered by all uses.  */
 260   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 261   gimple incremented_at;/* For original biv, the statement where it is
 262                            incremented.  */
 263   tree var_before;      /* The variable used for it before increment.  */
 264   tree var_after;       /* The variable used for it after increment.  */
 265   struct iv *iv;        /* The value of the candidate.  NULL for
 266                            "pseudocandidate" used to indicate the possibility
 267                            to replace the final value of an iv by direct
 268                            computation of the value.  */
 269   unsigned cost;        /* Cost of the candidate.  */
 270   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 271   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 272                               where it is incremented.  */
 273   bitmap depends_on;    /* The list of invariants that are used in step of the
 274                            biv.  */
 275 };
 276
 277 /* Loop invariant expression hashtable entry.  */
 278 struct iv_inv_expr_ent
 279 {
 280   tree expr;
 281   int id;
 282   hashval_t hash;
 283 };
 284
 285 /* The data used by the induction variable optimizations.  */
 286
 287 typedef struct iv_use *iv_use_p;
 288
 289 typedef struct iv_cand *iv_cand_p;
 290
 291 /* Hashtable helpers.  */
 292
 293 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 294 {
 295   typedef iv_inv_expr_ent *value_type;
 296   typedef iv_inv_expr_ent *compare_type;
 297   static inline hashval_t hash (const iv_inv_expr_ent *);
 298   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 299 };
 300
 301 /* Hash function for loop invariant expressions.  */
 302
 303 inline hashval_t
 304 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 305 {
 306   return expr->hash;
 307 }
 308
 309 /* Hash table equality function for expressions.  */
 310
 311 inline bool
 312 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 313                            const iv_inv_expr_ent *expr2)
 314 {
 315   return expr1->hash == expr2->hash
 316          && operand_equal_p (expr1->expr, expr2->expr, 0);
 317 }
 318
 319 struct ivopts_data
 320 {
 321   /* The currently optimized loop.  */
 322   struct loop *current_loop;
 323   source_location loop_loc;
 324
 325   /* Numbers of iterations for all exits of the current loop.  */
 326   hash_map<edge, tree_niter_desc *> *niters;
 327
 328   /* Number of registers used in it.  */
 329   unsigned regs_used;
 330
 331   /* The size of version_info array allocated.  */
 332   unsigned version_info_size;
 333
 334   /* The array of information for the ssa names.  */
 335   struct version_info *version_info;
 336
 337   /* The hashtable of loop invariant expressions created
 338      by ivopt.  */
 339   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 340
 341   /* Loop invariant expression id.  */
 342   int inv_expr_id;
 343
 344   /* The bitmap of indices in version_info whose value was changed.  */
 345   bitmap relevant;
 346
 347   /* The uses of induction variables.  */
 348   vec<iv_use_p> iv_uses;
 349
 350   /* The candidates.  */
 351   vec<iv_cand_p> iv_candidates;
 352
 353   /* A bitmap of important candidates.  */
 354   bitmap important_candidates;
 355
 356   /* Cache used by tree_to_aff_combination_expand.  */
 357   hash_map<tree, name_expansion *> *name_expansion_cache;
 358
 359   /* The maximum invariant id.  */
 360   unsigned max_inv_id;
 361
 362   /* Whether to consider just related and important candidates when replacing a
 363      use.  */
 364   bool consider_all_candidates;
 365
 366   /* Are we optimizing for speed?  */
 367   bool speed;
 368
 369   /* Whether the loop body includes any function calls.  */
 370   bool body_includes_call;
 371
 372   /* Whether the loop body can only be exited via single exit.  */
 373   bool loop_single_exit_p;
 374 };
 375
 376 /* An assignment of iv candidates to uses.  */
 377
 378 struct iv_ca
 379 {
 380   /* The number of uses covered by the assignment.  */
 381   unsigned upto;
 382
 383   /* Number of uses that cannot be expressed by the candidates in the set.  */
 384   unsigned bad_uses;
 385
 386   /* Candidate assigned to a use, together with the related costs.  */
 387   struct cost_pair **cand_for_use;
 388
 389   /* Number of times each candidate is used.  */
 390   unsigned *n_cand_uses;
 391
 392   /* The candidates used.  */
 393   bitmap cands;
 394
 395   /* The number of candidates in the set.  */
 396   unsigned n_cands;
 397
 398   /* Total number of registers needed.  */
 399   unsigned n_regs;
 400
 401   /* Total cost of expressing uses.  */
 402   comp_cost cand_use_cost;
 403
 404   /* Total cost of candidates.  */
 405   unsigned cand_cost;
 406
 407   /* Number of times each invariant is used.  */
 408   unsigned *n_invariant_uses;
 409
 410   /* The array holding the number of uses of each loop
 411      invariant expressions created by ivopt.  */
 412   unsigned *used_inv_expr;
 413
 414   /* The number of created loop invariants.  */
 415   unsigned num_used_inv_expr;
 416
 417   /* Total cost of the assignment.  */
 418   comp_cost cost;
 419 };
 420
 421 /* Difference of two iv candidate assignments.  */
 422
 423 struct iv_ca_delta
 424 {
 425   /* Changed use.  */
 426   struct iv_use *use;
 427
 428   /* An old assignment (for rollback purposes).  */
 429   struct cost_pair *old_cp;
 430
 431   /* A new assignment.  */
 432   struct cost_pair *new_cp;
 433
 434   /* Next change in the list.  */
 435   struct iv_ca_delta *next_change;
 436 };
 437
 438 /* Bound on number of candidates below that all candidates are considered.  */
 439
 440 #define CONSIDER_ALL_CANDIDATES_BOUND \
 441   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 442
 443 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 444    optimizing such a loop would help, and it would take ages).  */
 445
 446 #define MAX_CONSIDERED_USES \
 447   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 448
 449 /* If there are at most this number of ivs in the set, try removing unnecessary
 450    ivs from the set always.  */
 451
 452 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 453   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 454
 455 /* The list of trees for that the decl_rtl field must be reset is stored
 456    here.  */
 457
 458 static vec<tree> decl_rtl_to_reset;
 459
 460 static comp_cost force_expr_to_var_cost (tree, bool);
 461
 462 /* Number of uses recorded in DATA.  */
 463
 464 static inline unsigned
 465 n_iv_uses (struct ivopts_data *data)
 466 {
 467   return data->iv_uses.length ();
 468 }
 469
 470 /* Ith use recorded in DATA.  */
 471
 472 static inline struct iv_use *
 473 iv_use (struct ivopts_data *data, unsigned i)
 474 {
 475   return data->iv_uses[i];
 476 }
 477
 478 /* Number of candidates recorded in DATA.  */
 479
 480 static inline unsigned
 481 n_iv_cands (struct ivopts_data *data)
 482 {
 483   return data->iv_candidates.length ();
 484 }
 485
 486 /* Ith candidate recorded in DATA.  */
 487
 488 static inline struct iv_cand *
 489 iv_cand (struct ivopts_data *data, unsigned i)
 490 {
 491   return data->iv_candidates[i];
 492 }
 493
 494 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 495
 496 edge
 497 single_dom_exit (struct loop *loop)
 498 {
 499   edge exit = single_exit (loop);
 500
 501   if (!exit)
 502     return NULL;
 503
 504   if (!just_once_each_iteration_p (loop, exit->src))
 505     return NULL;
 506
 507   return exit;
 508 }
 509
 510 /* Dumps information about the induction variable IV to FILE.  */
 511
 512 void
 513 dump_iv (FILE *file, struct iv *iv)
 514 {
 515   if (iv->ssa_name)
 516     {
 517       fprintf (file, "ssa name ");
 518       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 519       fprintf (file, "\n");
 520     }
 521
 522   fprintf (file, "  type ");
 523   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 524   fprintf (file, "\n");
 525
 526   if (iv->step)
 527     {
 528       fprintf (file, "  base ");
 529       print_generic_expr (file, iv->base, TDF_SLIM);
 530       fprintf (file, "\n");
 531
 532       fprintf (file, "  step ");
 533       print_generic_expr (file, iv->step, TDF_SLIM);
 534       fprintf (file, "\n");
 535     }
 536   else
 537     {
 538       fprintf (file, "  invariant ");
 539       print_generic_expr (file, iv->base, TDF_SLIM);
 540       fprintf (file, "\n");
 541     }
 542
 543   if (iv->base_object)
 544     {
 545       fprintf (file, "  base object ");
 546       print_generic_expr (file, iv->base_object, TDF_SLIM);
 547       fprintf (file, "\n");
 548     }
 549
 550   if (iv->biv_p)
 551     fprintf (file, "  is a biv\n");
 552 }
 553
 554 /* Dumps information about the USE to FILE.  */
 555
 556 void
 557 dump_use (FILE *file, struct iv_use *use)
 558 {
 559   fprintf (file, "use %d\n", use->id);
 560
 561   switch (use->type)
 562     {
 563     case USE_NONLINEAR_EXPR:
 564       fprintf (file, "  generic\n");
 565       break;
 566
 567     case USE_ADDRESS:
 568       fprintf (file, "  address\n");
 569       break;
 570
 571     case USE_COMPARE:
 572       fprintf (file, "  compare\n");
 573       break;
 574
 575     default:
 576       gcc_unreachable ();
 577     }
 578
 579   fprintf (file, "  in statement ");
 580   print_gimple_stmt (file, use->stmt, 0, 0);
 581   fprintf (file, "\n");
 582
 583   fprintf (file, "  at position ");
 584   if (use->op_p)
 585     print_generic_expr (file, *use->op_p, TDF_SLIM);
 586   fprintf (file, "\n");
 587
 588   dump_iv (file, use->iv);
 589
 590   if (use->related_cands)
 591     {
 592       fprintf (file, "  related candidates ");
 593       dump_bitmap (file, use->related_cands);
 594     }
 595 }
 596
 597 /* Dumps information about the uses to FILE.  */
 598
 599 void
 600 dump_uses (FILE *file, struct ivopts_data *data)
 601 {
 602   unsigned i;
 603   struct iv_use *use;
 604
 605   for (i = 0; i < n_iv_uses (data); i++)
 606     {
 607       use = iv_use (data, i);
 608
 609       dump_use (file, use);
 610       fprintf (file, "\n");
 611     }
 612 }
 613
 614 /* Dumps information about induction variable candidate CAND to FILE.  */
 615
 616 void
 617 dump_cand (FILE *file, struct iv_cand *cand)
 618 {
 619   struct iv *iv = cand->iv;
 620
 621   fprintf (file, "candidate %d%s\n",
 622            cand->id, cand->important ? " (important)" : "");
 623
 624   if (cand->depends_on)
 625     {
 626       fprintf (file, "  depends on ");
 627       dump_bitmap (file, cand->depends_on);
 628     }
 629
 630   if (!iv)
 631     {
 632       fprintf (file, "  final value replacement\n");
 633       return;
 634     }
 635
 636   if (cand->var_before)
 637     {
 638       fprintf (file, "  var_before ");
 639       print_generic_expr (file, cand->var_before, TDF_SLIM);
 640       fprintf (file, "\n");
 641     }
 642   if (cand->var_after)
 643     {
 644       fprintf (file, "  var_after ");
 645       print_generic_expr (file, cand->var_after, TDF_SLIM);
 646       fprintf (file, "\n");
 647     }
 648
 649   switch (cand->pos)
 650     {
 651     case IP_NORMAL:
 652       fprintf (file, "  incremented before exit test\n");
 653       break;
 654
 655     case IP_BEFORE_USE:
 656       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 657       break;
 658
 659     case IP_AFTER_USE:
 660       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 661       break;
 662
 663     case IP_END:
 664       fprintf (file, "  incremented at end\n");
 665       break;
 666
 667     case IP_ORIGINAL:
 668       fprintf (file, "  original biv\n");
 669       break;
 670     }
 671
 672   dump_iv (file, iv);
 673 }
 674
 675 /* Returns the info for ssa version VER.  */
 676
 677 static inline struct version_info *
 678 ver_info (struct ivopts_data *data, unsigned ver)
 679 {
 680   return data->version_info + ver;
 681 }
 682
 683 /* Returns the info for ssa name NAME.  */
 684
 685 static inline struct version_info *
 686 name_info (struct ivopts_data *data, tree name)
 687 {
 688   return ver_info (data, SSA_NAME_VERSION (name));
 689 }
 690
 691 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 692    emitted in LOOP.  */
 693
 694 static bool
 695 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 696 {
 697   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 698
 699   gcc_assert (bb);
 700
 701   if (sbb == loop->latch)
 702     return true;
 703
 704   if (sbb != bb)
 705     return false;
 706
 707   return stmt == last_stmt (bb);
 708 }
 709
 710 /* Returns true if STMT if after the place where the original induction
 711    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 712    if the positions are identical.  */
 713
 714 static bool
 715 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 716 {
 717   basic_block cand_bb = gimple_bb (cand->incremented_at);
 718   basic_block stmt_bb = gimple_bb (stmt);
 719
 720   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 721     return false;
 722
 723   if (stmt_bb != cand_bb)
 724     return true;
 725
 726   if (true_if_equal
 727       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 728     return true;
 729   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 730 }
 731
 732 /* Returns true if STMT if after the place where the induction variable
 733    CAND is incremented in LOOP.  */
 734
 735 static bool
 736 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 737 {
 738   switch (cand->pos)
 739     {
 740     case IP_END:
 741       return false;
 742
 743     case IP_NORMAL:
 744       return stmt_after_ip_normal_pos (loop, stmt);
 745
 746     case IP_ORIGINAL:
 747     case IP_AFTER_USE:
 748       return stmt_after_inc_pos (cand, stmt, false);
 749
 750     case IP_BEFORE_USE:
 751       return stmt_after_inc_pos (cand, stmt, true);
 752
 753     default:
 754       gcc_unreachable ();
 755     }
 756 }
 757
 758 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 759
 760 static bool
 761 abnormal_ssa_name_p (tree exp)
 762 {
 763   if (!exp)
 764     return false;
 765
 766   if (TREE_CODE (exp) != SSA_NAME)
 767     return false;
 768
 769   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 770 }
 771
 772 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 773    abnormal phi node.  Callback for for_each_index.  */
 774
 775 static bool
 776 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 777                                   void *data ATTRIBUTE_UNUSED)
 778 {
 779   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 780     {
 781       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 782         return false;
 783       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 784         return false;
 785     }
 786
 787   return !abnormal_ssa_name_p (*index);
 788 }
 789
 790 /* Returns true if EXPR contains a ssa name that occurs in an
 791    abnormal phi node.  */
 792
 793 bool
 794 contains_abnormal_ssa_name_p (tree expr)
 795 {
 796   enum tree_code code;
 797   enum tree_code_class codeclass;
 798
 799   if (!expr)
 800     return false;
 801
 802   code = TREE_CODE (expr);
 803   codeclass = TREE_CODE_CLASS (code);
 804
 805   if (code == SSA_NAME)
 806     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 807
 808   if (code == INTEGER_CST
 809       || is_gimple_min_invariant (expr))
 810     return false;
 811
 812   if (code == ADDR_EXPR)
 813     return !for_each_index (&TREE_OPERAND (expr, 0),
 814                             idx_contains_abnormal_ssa_name_p,
 815                             NULL);
 816
 817   if (code == COND_EXPR)
 818     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 819       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 820       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 821
 822   switch (codeclass)
 823     {
 824     case tcc_binary:
 825     case tcc_comparison:
 826       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 827         return true;
 828
 829       /* Fallthru.  */
 830     case tcc_unary:
 831       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 832         return true;
 833
 834       break;
 835
 836     default:
 837       gcc_unreachable ();
 838     }
 839
 840   return false;
 841 }
 842
 843 /*  Returns the structure describing number of iterations determined from
 844     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 845
 846 static struct tree_niter_desc *
 847 niter_for_exit (struct ivopts_data *data, edge exit)
 848 {
 849   struct tree_niter_desc *desc;
 850   tree_niter_desc **slot;
 851
 852   if (!data->niters)
 853     {
 854       data->niters = new hash_map<edge, tree_niter_desc *>;
 855       slot = NULL;
 856     }
 857   else
 858     slot = data->niters->get (exit);
 859
 860   if (!slot)
 861     {
 862       /* Try to determine number of iterations.  We cannot safely work with ssa
 863          names that appear in phi nodes on abnormal edges, so that we do not
 864          create overlapping life ranges for them (PR 27283).  */
 865       desc = XNEW (struct tree_niter_desc);
 866       if (!number_of_iterations_exit (data->current_loop,
 867                                       exit, desc, true)
 868           || contains_abnormal_ssa_name_p (desc->niter))
 869         {
 870           XDELETE (desc);
 871           desc = NULL;
 872         }
 873       data->niters->put (exit, desc);
 874     }
 875   else
 876     desc = *slot;
 877
 878   return desc;
 879 }
 880
 881 /* Returns the structure describing number of iterations determined from
 882    single dominating exit of DATA->current_loop, or NULL if something
 883    goes wrong.  */
 884
 885 static struct tree_niter_desc *
 886 niter_for_single_dom_exit (struct ivopts_data *data)
 887 {
 888   edge exit = single_dom_exit (data->current_loop);
 889
 890   if (!exit)
 891     return NULL;
 892
 893   return niter_for_exit (data, exit);
 894 }
 895
 896 /* Initializes data structures used by the iv optimization pass, stored
 897    in DATA.  */
 898
 899 static void
 900 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 901 {
 902   data->version_info_size = 2 * num_ssa_names;
 903   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 904   data->relevant = BITMAP_ALLOC (NULL);
 905   data->important_candidates = BITMAP_ALLOC (NULL);
 906   data->max_inv_id = 0;
 907   data->niters = NULL;
 908   data->iv_uses.create (20);
 909   data->iv_candidates.create (20);
 910   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 911   data->inv_expr_id = 0;
 912   data->name_expansion_cache = NULL;
 913   decl_rtl_to_reset.create (20);
 914 }
 915
 916 /* Returns a memory object to that EXPR points.  In case we are able to
 917    determine that it does not point to any such object, NULL is returned.  */
 918
 919 static tree
 920 determine_base_object (tree expr)
 921 {
 922   enum tree_code code = TREE_CODE (expr);
 923   tree base, obj;
 924
 925   /* If this is a pointer casted to any type, we need to determine
 926      the base object for the pointer; so handle conversions before
 927      throwing away non-pointer expressions.  */
 928   if (CONVERT_EXPR_P (expr))
 929     return determine_base_object (TREE_OPERAND (expr, 0));
 930
 931   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 932     return NULL_TREE;
 933
 934   switch (code)
 935     {
 936     case INTEGER_CST:
 937       return NULL_TREE;
 938
 939     case ADDR_EXPR:
 940       obj = TREE_OPERAND (expr, 0);
 941       base = get_base_address (obj);
 942
 943       if (!base)
 944         return expr;
 945
 946       if (TREE_CODE (base) == MEM_REF)
 947         return determine_base_object (TREE_OPERAND (base, 0));
 948
 949       return fold_convert (ptr_type_node,
 950                            build_fold_addr_expr (base));
 951
 952     case POINTER_PLUS_EXPR:
 953       return determine_base_object (TREE_OPERAND (expr, 0));
 954
 955     case PLUS_EXPR:
 956     case MINUS_EXPR:
 957       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 958       gcc_unreachable ();
 959
 960     default:
 961       return fold_convert (ptr_type_node, expr);
 962     }
 963 }
 964
 965 /* Return true if address expression with non-DECL_P operand appears
 966    in EXPR.  */
 967
 968 static bool
 969 contain_complex_addr_expr (tree expr)
 970 {
 971   bool res = false;
 972
 973   STRIP_NOPS (expr);
 974   switch (TREE_CODE (expr))
 975     {
 976     case POINTER_PLUS_EXPR:
 977     case PLUS_EXPR:
 978     case MINUS_EXPR:
 979       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 980       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 981       break;
 982
 983     case ADDR_EXPR:
 984       return (!DECL_P (TREE_OPERAND (expr, 0)));
 985
 986     default:
 987       return false;
 988     }
 989
 990   return res;
 991 }
 992
 993 /* Allocates an induction variable with given initial value BASE and step STEP
 994    for loop LOOP.  */
 995
 996 static struct iv *
 997 alloc_iv (tree base, tree step)
 998 {
 999   tree expr = base;
1000   struct iv *iv = XCNEW (struct iv);
1001   gcc_assert (step != NULL_TREE);
1002
1003   /* Lower address expression in base except ones with DECL_P as operand.
1004      By doing this:
1005        1) More accurate cost can be computed for address expressions;
1006        2) Duplicate candidates won't be created for bases in different
1007           forms, like &a[0] and &a.  */
1008   STRIP_NOPS (expr);
1009   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1010       || contain_complex_addr_expr (expr))
1011     {
1012       aff_tree comb;
1013       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1014       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1015     }
1016
1017   iv->base = base;
1018   iv->base_object = determine_base_object (base);
1019   iv->step = step;
1020   iv->biv_p = false;
1021   iv->have_use_for = false;
1022   iv->use_id = 0;
1023   iv->ssa_name = NULL_TREE;
1024
1025   return iv;
1026 }
1027
1028 /* Sets STEP and BASE for induction variable IV.  */
1029
1030 static void
1031 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1032 {
1033   struct version_info *info = name_info (data, iv);
1034
1035   gcc_assert (!info->iv);
1036
1037   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1038   info->iv = alloc_iv (base, step);
1039   info->iv->ssa_name = iv;
1040 }
1041
1042 /* Finds induction variable declaration for VAR.  */
1043
1044 static struct iv *
1045 get_iv (struct ivopts_data *data, tree var)
1046 {
1047   basic_block bb;
1048   tree type = TREE_TYPE (var);
1049
1050   if (!POINTER_TYPE_P (type)
1051       && !INTEGRAL_TYPE_P (type))
1052     return NULL;
1053
1054   if (!name_info (data, var)->iv)
1055     {
1056       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1057
1058       if (!bb
1059           || !flow_bb_inside_loop_p (data->current_loop, bb))
1060         set_iv (data, var, var, build_int_cst (type, 0));
1061     }
1062
1063   return name_info (data, var)->iv;
1064 }
1065
1066 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1067    not define a simple affine biv with nonzero step.  */
1068
1069 static tree
1070 determine_biv_step (gphi *phi)
1071 {
1072   struct loop *loop = gimple_bb (phi)->loop_father;
1073   tree name = PHI_RESULT (phi);
1074   affine_iv iv;
1075
1076   if (virtual_operand_p (name))
1077     return NULL_TREE;
1078
1079   if (!simple_iv (loop, loop, name, &iv, true))
1080     return NULL_TREE;
1081
1082   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1083 }
1084
1085 /* Return the first non-invariant ssa var found in EXPR.  */
1086
1087 static tree
1088 extract_single_var_from_expr (tree expr)
1089 {
1090   int i, n;
1091   tree tmp;
1092   enum tree_code code;
1093
1094   if (!expr || is_gimple_min_invariant (expr))
1095     return NULL;
1096
1097   code = TREE_CODE (expr);
1098   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1099     {
1100       n = TREE_OPERAND_LENGTH (expr);
1101       for (i = 0; i < n; i++)
1102         {
1103           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1104
1105           if (tmp)
1106             return tmp;
1107         }
1108     }
1109   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1110 }
1111
1112 /* Finds basic ivs.  */
1113
1114 static bool
1115 find_bivs (struct ivopts_data *data)
1116 {
1117   gphi *phi;
1118   tree step, type, base, stop;
1119   bool found = false;
1120   struct loop *loop = data->current_loop;
1121   gphi_iterator psi;
1122
1123   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1124     {
1125       phi = psi.phi ();
1126
1127       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1128         continue;
1129
1130       step = determine_biv_step (phi);
1131       if (!step)
1132         continue;
1133
1134       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1135       /* Stop expanding iv base at the first ssa var referred by iv step.
1136          Ideally we should stop at any ssa var, because that's expensive
1137          and unusual to happen, we just do it on the first one.
1138
1139          See PR64705 for the rationale.  */
1140       stop = extract_single_var_from_expr (step);
1141       base = expand_simple_operations (base, stop);
1142       if (contains_abnormal_ssa_name_p (base)
1143           || contains_abnormal_ssa_name_p (step))
1144         continue;
1145
1146       type = TREE_TYPE (PHI_RESULT (phi));
1147       base = fold_convert (type, base);
1148       if (step)
1149         {
1150           if (POINTER_TYPE_P (type))
1151             step = convert_to_ptrofftype (step);
1152           else
1153             step = fold_convert (type, step);
1154         }
1155
1156       set_iv (data, PHI_RESULT (phi), base, step);
1157       found = true;
1158     }
1159
1160   return found;
1161 }
1162
1163 /* Marks basic ivs.  */
1164
1165 static void
1166 mark_bivs (struct ivopts_data *data)
1167 {
1168   gphi *phi;
1169   gimple def;
1170   tree var;
1171   struct iv *iv, *incr_iv;
1172   struct loop *loop = data->current_loop;
1173   basic_block incr_bb;
1174   gphi_iterator psi;
1175
1176   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1177     {
1178       phi = psi.phi ();
1179
1180       iv = get_iv (data, PHI_RESULT (phi));
1181       if (!iv)
1182         continue;
1183
1184       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1185       def = SSA_NAME_DEF_STMT (var);
1186       /* Don't mark iv peeled from other one as biv.  */
1187       if (def
1188           && gimple_code (def) == GIMPLE_PHI
1189           && gimple_bb (def) == loop->header)
1190         continue;
1191
1192       incr_iv = get_iv (data, var);
1193       if (!incr_iv)
1194         continue;
1195
1196       /* If the increment is in the subloop, ignore it.  */
1197       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1198       if (incr_bb->loop_father != data->current_loop
1199           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1200         continue;
1201
1202       iv->biv_p = true;
1203       incr_iv->biv_p = true;
1204     }
1205 }
1206
1207 /* Checks whether STMT defines a linear induction variable and stores its
1208    parameters to IV.  */
1209
1210 static bool
1211 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1212 {
1213   tree lhs, stop;
1214   struct loop *loop = data->current_loop;
1215
1216   iv->base = NULL_TREE;
1217   iv->step = NULL_TREE;
1218
1219   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1220     return false;
1221
1222   lhs = gimple_assign_lhs (stmt);
1223   if (TREE_CODE (lhs) != SSA_NAME)
1224     return false;
1225
1226   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1227     return false;
1228
1229   /* Stop expanding iv base at the first ssa var referred by iv step.
1230      Ideally we should stop at any ssa var, because that's expensive
1231      and unusual to happen, we just do it on the first one.
1232
1233      See PR64705 for the rationale.  */
1234   stop = extract_single_var_from_expr (iv->step);
1235   iv->base = expand_simple_operations (iv->base, stop);
1236   if (contains_abnormal_ssa_name_p (iv->base)
1237       || contains_abnormal_ssa_name_p (iv->step))
1238     return false;
1239
1240   /* If STMT could throw, then do not consider STMT as defining a GIV.
1241      While this will suppress optimizations, we can not safely delete this
1242      GIV and associated statements, even if it appears it is not used.  */
1243   if (stmt_could_throw_p (stmt))
1244     return false;
1245
1246   return true;
1247 }
1248
1249 /* Finds general ivs in statement STMT.  */
1250
1251 static void
1252 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1253 {
1254   affine_iv iv;
1255
1256   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1257     return;
1258
1259   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1260 }
1261
1262 /* Finds general ivs in basic block BB.  */
1263
1264 static void
1265 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1266 {
1267   gimple_stmt_iterator bsi;
1268
1269   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1270     find_givs_in_stmt (data, gsi_stmt (bsi));
1271 }
1272
1273 /* Finds general ivs.  */
1274
1275 static void
1276 find_givs (struct ivopts_data *data)
1277 {
1278   struct loop *loop = data->current_loop;
1279   basic_block *body = get_loop_body_in_dom_order (loop);
1280   unsigned i;
1281
1282   for (i = 0; i < loop->num_nodes; i++)
1283     find_givs_in_bb (data, body[i]);
1284   free (body);
1285 }
1286
1287 /* For each ssa name defined in LOOP determines whether it is an induction
1288    variable and if so, its initial value and step.  */
1289
1290 static bool
1291 find_induction_variables (struct ivopts_data *data)
1292 {
1293   unsigned i;
1294   bitmap_iterator bi;
1295
1296   if (!find_bivs (data))
1297     return false;
1298
1299   find_givs (data);
1300   mark_bivs (data);
1301
1302   if (dump_file && (dump_flags & TDF_DETAILS))
1303     {
1304       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1305
1306       if (niter)
1307         {
1308           fprintf (dump_file, "  number of iterations ");
1309           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1310           if (!integer_zerop (niter->may_be_zero))
1311             {
1312               fprintf (dump_file, "; zero if ");
1313               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1314             }
1315           fprintf (dump_file, "\n\n");
1316         };
1317
1318       fprintf (dump_file, "Induction variables:\n\n");
1319
1320       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1321         {
1322           if (ver_info (data, i)->iv)
1323             dump_iv (dump_file, ver_info (data, i)->iv);
1324         }
1325     }
1326
1327   return true;
1328 }
1329
1330 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1331
1332 static struct iv_use *
1333 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1334             gimple stmt, enum use_type use_type)
1335 {
1336   struct iv_use *use = XCNEW (struct iv_use);
1337
1338   use->id = n_iv_uses (data);
1339   use->type = use_type;
1340   use->iv = iv;
1341   use->stmt = stmt;
1342   use->op_p = use_p;
1343   use->related_cands = BITMAP_ALLOC (NULL);
1344
1345   /* To avoid showing ssa name in the dumps, if it was not reset by the
1346      caller.  */
1347   iv->ssa_name = NULL_TREE;
1348
1349   if (dump_file && (dump_flags & TDF_DETAILS))
1350     dump_use (dump_file, use);
1351
1352   data->iv_uses.safe_push (use);
1353
1354   return use;
1355 }
1356
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358    NONLINEAR_USE is true if the invariant is used in a way we do not
1359    handle specially.  */
1360
1361 static void
1362 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1363 {
1364   basic_block bb;
1365   struct version_info *info;
1366
1367   if (TREE_CODE (op) != SSA_NAME
1368       || virtual_operand_p (op))
1369     return;
1370
1371   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1372   if (bb
1373       && flow_bb_inside_loop_p (data->current_loop, bb))
1374     return;
1375
1376   info = name_info (data, op);
1377   info->name = op;
1378   info->has_nonlin_use |= nonlinear_use;
1379   if (!info->inv_id)
1380     info->inv_id = ++data->max_inv_id;
1381   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1382 }
1383
1384 /* Checks whether the use OP is interesting and if so, records it.  */
1385
1386 static struct iv_use *
1387 find_interesting_uses_op (struct ivopts_data *data, tree op)
1388 {
1389   struct iv *iv;
1390   struct iv *civ;
1391   gimple stmt;
1392   struct iv_use *use;
1393
1394   if (TREE_CODE (op) != SSA_NAME)
1395     return NULL;
1396
1397   iv = get_iv (data, op);
1398   if (!iv)
1399     return NULL;
1400
1401   if (iv->have_use_for)
1402     {
1403       use = iv_use (data, iv->use_id);
1404
1405       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1406       return use;
1407     }
1408
1409   if (integer_zerop (iv->step))
1410     {
1411       record_invariant (data, op, true);
1412       return NULL;
1413     }
1414   iv->have_use_for = true;
1415
1416   civ = XNEW (struct iv);
1417   *civ = *iv;
1418
1419   stmt = SSA_NAME_DEF_STMT (op);
1420   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1421               || is_gimple_assign (stmt));
1422
1423   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1424   iv->use_id = use->id;
1425
1426   return use;
1427 }
1428
1429 /* Given a condition in statement STMT, checks whether it is a compare
1430    of an induction variable and an invariant.  If this is the case,
1431    CONTROL_VAR is set to location of the iv, BOUND to the location of
1432    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1433    induction variable descriptions, and true is returned.  If this is not
1434    the case, CONTROL_VAR and BOUND are set to the arguments of the
1435    condition and false is returned.  */
1436
1437 static bool
1438 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1439                        tree **control_var, tree **bound,
1440                        struct iv **iv_var, struct iv **iv_bound)
1441 {
1442   /* The objects returned when COND has constant operands.  */
1443   static struct iv const_iv;
1444   static tree zero;
1445   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1446   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1447   bool ret = false;
1448
1449   if (gimple_code (stmt) == GIMPLE_COND)
1450     {
1451       gcond *cond_stmt = as_a <gcond *> (stmt);
1452       op0 = gimple_cond_lhs_ptr (cond_stmt);
1453       op1 = gimple_cond_rhs_ptr (cond_stmt);
1454     }
1455   else
1456     {
1457       op0 = gimple_assign_rhs1_ptr (stmt);
1458       op1 = gimple_assign_rhs2_ptr (stmt);
1459     }
1460
1461   zero = integer_zero_node;
1462   const_iv.step = integer_zero_node;
1463
1464   if (TREE_CODE (*op0) == SSA_NAME)
1465     iv0 = get_iv (data, *op0);
1466   if (TREE_CODE (*op1) == SSA_NAME)
1467     iv1 = get_iv (data, *op1);
1468
1469   /* Exactly one of the compared values must be an iv, and the other one must
1470      be an invariant.  */
1471   if (!iv0 || !iv1)
1472     goto end;
1473
1474   if (integer_zerop (iv0->step))
1475     {
1476       /* Control variable may be on the other side.  */
1477       tmp_op = op0; op0 = op1; op1 = tmp_op;
1478       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1479     }
1480   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1481
1482 end:
1483   if (control_var)
1484     *control_var = op0;
1485   if (iv_var)
1486     *iv_var = iv0;
1487   if (bound)
1488     *bound = op1;
1489   if (iv_bound)
1490     *iv_bound = iv1;
1491
1492   return ret;
1493 }
1494
1495 /* Checks whether the condition in STMT is interesting and if so,
1496    records it.  */
1497
1498 static void
1499 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1500 {
1501   tree *var_p, *bound_p;
1502   struct iv *var_iv, *civ;
1503
1504   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1505     {
1506       find_interesting_uses_op (data, *var_p);
1507       find_interesting_uses_op (data, *bound_p);
1508       return;
1509     }
1510
1511   civ = XNEW (struct iv);
1512   *civ = *var_iv;
1513   record_use (data, NULL, civ, stmt, USE_COMPARE);
1514 }
1515
1516 /* Returns the outermost loop EXPR is obviously invariant in
1517    relative to the loop LOOP, i.e. if all its operands are defined
1518    outside of the returned loop.  Returns NULL if EXPR is not
1519    even obviously invariant in LOOP.  */
1520
1521 struct loop *
1522 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1523 {
1524   basic_block def_bb;
1525   unsigned i, len;
1526
1527   if (is_gimple_min_invariant (expr))
1528     return current_loops->tree_root;
1529
1530   if (TREE_CODE (expr) == SSA_NAME)
1531     {
1532       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1533       if (def_bb)
1534         {
1535           if (flow_bb_inside_loop_p (loop, def_bb))
1536             return NULL;
1537           return superloop_at_depth (loop,
1538                                      loop_depth (def_bb->loop_father) + 1);
1539         }
1540
1541       return current_loops->tree_root;
1542     }
1543
1544   if (!EXPR_P (expr))
1545     return NULL;
1546
1547   unsigned maxdepth = 0;
1548   len = TREE_OPERAND_LENGTH (expr);
1549   for (i = 0; i < len; i++)
1550     {
1551       struct loop *ivloop;
1552       if (!TREE_OPERAND (expr, i))
1553         continue;
1554
1555       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1556       if (!ivloop)
1557         return NULL;
1558       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1559     }
1560
1561   return superloop_at_depth (loop, maxdepth);
1562 }
1563
1564 /* Returns true if expression EXPR is obviously invariant in LOOP,
1565    i.e. if all its operands are defined outside of the LOOP.  LOOP
1566    should not be the function body.  */
1567
1568 bool
1569 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1570 {
1571   basic_block def_bb;
1572   unsigned i, len;
1573
1574   gcc_assert (loop_depth (loop) > 0);
1575
1576   if (is_gimple_min_invariant (expr))
1577     return true;
1578
1579   if (TREE_CODE (expr) == SSA_NAME)
1580     {
1581       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1582       if (def_bb
1583           && flow_bb_inside_loop_p (loop, def_bb))
1584         return false;
1585
1586       return true;
1587     }
1588
1589   if (!EXPR_P (expr))
1590     return false;
1591
1592   len = TREE_OPERAND_LENGTH (expr);
1593   for (i = 0; i < len; i++)
1594     if (TREE_OPERAND (expr, i)
1595         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1596       return false;
1597
1598   return true;
1599 }
1600
1601 /* Cumulates the steps of indices into DATA and replaces their values with the
1602    initial ones.  Returns false when the value of the index cannot be determined.
1603    Callback for for_each_index.  */
1604
1605 struct ifs_ivopts_data
1606 {
1607   struct ivopts_data *ivopts_data;
1608   gimple stmt;
1609   tree step;
1610 };
1611
1612 static bool
1613 idx_find_step (tree base, tree *idx, void *data)
1614 {
1615   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1616   struct iv *iv;
1617   tree step, iv_base, iv_step, lbound, off;
1618   struct loop *loop = dta->ivopts_data->current_loop;
1619
1620   /* If base is a component ref, require that the offset of the reference
1621      be invariant.  */
1622   if (TREE_CODE (base) == COMPONENT_REF)
1623     {
1624       off = component_ref_field_offset (base);
1625       return expr_invariant_in_loop_p (loop, off);
1626     }
1627
1628   /* If base is array, first check whether we will be able to move the
1629      reference out of the loop (in order to take its address in strength
1630      reduction).  In order for this to work we need both lower bound
1631      and step to be loop invariants.  */
1632   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1633     {
1634       /* Moreover, for a range, the size needs to be invariant as well.  */
1635       if (TREE_CODE (base) == ARRAY_RANGE_REF
1636           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1637         return false;
1638
1639       step = array_ref_element_size (base);
1640       lbound = array_ref_low_bound (base);
1641
1642       if (!expr_invariant_in_loop_p (loop, step)
1643           || !expr_invariant_in_loop_p (loop, lbound))
1644         return false;
1645     }
1646
1647   if (TREE_CODE (*idx) != SSA_NAME)
1648     return true;
1649
1650   iv = get_iv (dta->ivopts_data, *idx);
1651   if (!iv)
1652     return false;
1653
1654   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1655           *&x[0], which is not folded and does not trigger the
1656           ARRAY_REF path below.  */
1657   *idx = iv->base;
1658
1659   if (integer_zerop (iv->step))
1660     return true;
1661
1662   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1663     {
1664       step = array_ref_element_size (base);
1665
1666       /* We only handle addresses whose step is an integer constant.  */
1667       if (TREE_CODE (step) != INTEGER_CST)
1668         return false;
1669     }
1670   else
1671     /* The step for pointer arithmetics already is 1 byte.  */
1672     step = size_one_node;
1673
1674   iv_base = iv->base;
1675   iv_step = iv->step;
1676   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1677                             sizetype, &iv_base, &iv_step, dta->stmt,
1678                             false))
1679     {
1680       /* The index might wrap.  */
1681       return false;
1682     }
1683
1684   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1685   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1686
1687   return true;
1688 }
1689
1690 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1691    object is passed to it in DATA.  */
1692
1693 static bool
1694 idx_record_use (tree base, tree *idx,
1695                 void *vdata)
1696 {
1697   struct ivopts_data *data = (struct ivopts_data *) vdata;
1698   find_interesting_uses_op (data, *idx);
1699   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1700     {
1701       find_interesting_uses_op (data, array_ref_element_size (base));
1702       find_interesting_uses_op (data, array_ref_low_bound (base));
1703     }
1704   return true;
1705 }
1706
1707 /* If we can prove that TOP = cst * BOT for some constant cst,
1708    store cst to MUL and return true.  Otherwise return false.
1709    The returned value is always sign-extended, regardless of the
1710    signedness of TOP and BOT.  */
1711
1712 static bool
1713 constant_multiple_of (tree top, tree bot, widest_int *mul)
1714 {
1715   tree mby;
1716   enum tree_code code;
1717   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1718   widest_int res, p0, p1;
1719
1720   STRIP_NOPS (top);
1721   STRIP_NOPS (bot);
1722
1723   if (operand_equal_p (top, bot, 0))
1724     {
1725       *mul = 1;
1726       return true;
1727     }
1728
1729   code = TREE_CODE (top);
1730   switch (code)
1731     {
1732     case MULT_EXPR:
1733       mby = TREE_OPERAND (top, 1);
1734       if (TREE_CODE (mby) != INTEGER_CST)
1735         return false;
1736
1737       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1738         return false;
1739
1740       *mul = wi::sext (res * wi::to_widest (mby), precision);
1741       return true;
1742
1743     case PLUS_EXPR:
1744     case MINUS_EXPR:
1745       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1746           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1747         return false;
1748
1749       if (code == MINUS_EXPR)
1750         p1 = -p1;
1751       *mul = wi::sext (p0 + p1, precision);
1752       return true;
1753
1754     case INTEGER_CST:
1755       if (TREE_CODE (bot) != INTEGER_CST)
1756         return false;
1757
1758       p0 = widest_int::from (top, SIGNED);
1759       p1 = widest_int::from (bot, SIGNED);
1760       if (p1 == 0)
1761         return false;
1762       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1763       return res == 0;
1764
1765     default:
1766       return false;
1767     }
1768 }
1769
1770 /* Return true if memory reference REF with step STEP may be unaligned.  */
1771
1772 static bool
1773 may_be_unaligned_p (tree ref, tree step)
1774 {
1775   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1776      thus they are not misaligned.  */
1777   if (TREE_CODE (ref) == TARGET_MEM_REF)
1778     return false;
1779
1780   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1781   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1782     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1783
1784   unsigned HOST_WIDE_INT bitpos;
1785   unsigned int ref_align;
1786   get_object_alignment_1 (ref, &ref_align, &bitpos);
1787   if (ref_align < align
1788       || (bitpos % align) != 0
1789       || (bitpos % BITS_PER_UNIT) != 0)
1790     return true;
1791
1792   unsigned int trailing_zeros = tree_ctz (step);
1793   if (trailing_zeros < HOST_BITS_PER_INT
1794       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1795     return true;
1796
1797   return false;
1798 }
1799
1800 /* Return true if EXPR may be non-addressable.   */
1801
1802 bool
1803 may_be_nonaddressable_p (tree expr)
1804 {
1805   switch (TREE_CODE (expr))
1806     {
1807     case TARGET_MEM_REF:
1808       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1809          target, thus they are always addressable.  */
1810       return false;
1811
1812     case COMPONENT_REF:
1813       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1814              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1815
1816     case VIEW_CONVERT_EXPR:
1817       /* This kind of view-conversions may wrap non-addressable objects
1818          and make them look addressable.  After some processing the
1819          non-addressability may be uncovered again, causing ADDR_EXPRs
1820          of inappropriate objects to be built.  */
1821       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1822           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1823         return true;
1824
1825       /* ... fall through ... */
1826
1827     case ARRAY_REF:
1828     case ARRAY_RANGE_REF:
1829       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1830
1831     CASE_CONVERT:
1832       return true;
1833
1834     default:
1835       break;
1836     }
1837
1838   return false;
1839 }
1840
1841 /* Finds addresses in *OP_P inside STMT.  */
1842
1843 static void
1844 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1845 {
1846   tree base = *op_p, step = size_zero_node;
1847   struct iv *civ;
1848   struct ifs_ivopts_data ifs_ivopts_data;
1849
1850   /* Do not play with volatile memory references.  A bit too conservative,
1851      perhaps, but safe.  */
1852   if (gimple_has_volatile_ops (stmt))
1853     goto fail;
1854
1855   /* Ignore bitfields for now.  Not really something terribly complicated
1856      to handle.  TODO.  */
1857   if (TREE_CODE (base) == BIT_FIELD_REF)
1858     goto fail;
1859
1860   base = unshare_expr (base);
1861
1862   if (TREE_CODE (base) == TARGET_MEM_REF)
1863     {
1864       tree type = build_pointer_type (TREE_TYPE (base));
1865       tree astep;
1866
1867       if (TMR_BASE (base)
1868           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1869         {
1870           civ = get_iv (data, TMR_BASE (base));
1871           if (!civ)
1872             goto fail;
1873
1874           TMR_BASE (base) = civ->base;
1875           step = civ->step;
1876         }
1877       if (TMR_INDEX2 (base)
1878           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1879         {
1880           civ = get_iv (data, TMR_INDEX2 (base));
1881           if (!civ)
1882             goto fail;
1883
1884           TMR_INDEX2 (base) = civ->base;
1885           step = civ->step;
1886         }
1887       if (TMR_INDEX (base)
1888           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1889         {
1890           civ = get_iv (data, TMR_INDEX (base));
1891           if (!civ)
1892             goto fail;
1893
1894           TMR_INDEX (base) = civ->base;
1895           astep = civ->step;
1896
1897           if (astep)
1898             {
1899               if (TMR_STEP (base))
1900                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1901
1902               step = fold_build2 (PLUS_EXPR, type, step, astep);
1903             }
1904         }
1905
1906       if (integer_zerop (step))
1907         goto fail;
1908       base = tree_mem_ref_addr (type, base);
1909     }
1910   else
1911     {
1912       ifs_ivopts_data.ivopts_data = data;
1913       ifs_ivopts_data.stmt = stmt;
1914       ifs_ivopts_data.step = size_zero_node;
1915       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1916           || integer_zerop (ifs_ivopts_data.step))
1917         goto fail;
1918       step = ifs_ivopts_data.step;
1919
1920       /* Check that the base expression is addressable.  This needs
1921          to be done after substituting bases of IVs into it.  */
1922       if (may_be_nonaddressable_p (base))
1923         goto fail;
1924
1925       /* Moreover, on strict alignment platforms, check that it is
1926          sufficiently aligned.  */
1927       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1928         goto fail;
1929
1930       base = build_fold_addr_expr (base);
1931
1932       /* Substituting bases of IVs into the base expression might
1933          have caused folding opportunities.  */
1934       if (TREE_CODE (base) == ADDR_EXPR)
1935         {
1936           tree *ref = &TREE_OPERAND (base, 0);
1937           while (handled_component_p (*ref))
1938             ref = &TREE_OPERAND (*ref, 0);
1939           if (TREE_CODE (*ref) == MEM_REF)
1940             {
1941               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1942                                       TREE_OPERAND (*ref, 0),
1943                                       TREE_OPERAND (*ref, 1));
1944               if (tem)
1945                 *ref = tem;
1946             }
1947         }
1948     }
1949
1950   civ = alloc_iv (base, step);
1951   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1952   return;
1953
1954 fail:
1955   for_each_index (op_p, idx_record_use, data);
1956 }
1957
1958 /* Finds and records invariants used in STMT.  */
1959
1960 static void
1961 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1962 {
1963   ssa_op_iter iter;
1964   use_operand_p use_p;
1965   tree op;
1966
1967   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1968     {
1969       op = USE_FROM_PTR (use_p);
1970       record_invariant (data, op, false);
1971     }
1972 }
1973
1974 /* Finds interesting uses of induction variables in the statement STMT.  */
1975
1976 static void
1977 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1978 {
1979   struct iv *iv;
1980   tree op, *lhs, *rhs;
1981   ssa_op_iter iter;
1982   use_operand_p use_p;
1983   enum tree_code code;
1984
1985   find_invariants_stmt (data, stmt);
1986
1987   if (gimple_code (stmt) == GIMPLE_COND)
1988     {
1989       find_interesting_uses_cond (data, stmt);
1990       return;
1991     }
1992
1993   if (is_gimple_assign (stmt))
1994     {
1995       lhs = gimple_assign_lhs_ptr (stmt);
1996       rhs = gimple_assign_rhs1_ptr (stmt);
1997
1998       if (TREE_CODE (*lhs) == SSA_NAME)
1999         {
2000           /* If the statement defines an induction variable, the uses are not
2001              interesting by themselves.  */
2002
2003           iv = get_iv (data, *lhs);
2004
2005           if (iv && !integer_zerop (iv->step))
2006             return;
2007         }
2008
2009       code = gimple_assign_rhs_code (stmt);
2010       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2011           && (REFERENCE_CLASS_P (*rhs)
2012               || is_gimple_val (*rhs)))
2013         {
2014           if (REFERENCE_CLASS_P (*rhs))
2015             find_interesting_uses_address (data, stmt, rhs);
2016           else
2017             find_interesting_uses_op (data, *rhs);
2018
2019           if (REFERENCE_CLASS_P (*lhs))
2020             find_interesting_uses_address (data, stmt, lhs);
2021           return;
2022         }
2023       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2024         {
2025           find_interesting_uses_cond (data, stmt);
2026           return;
2027         }
2028
2029       /* TODO -- we should also handle address uses of type
2030
2031          memory = call (whatever);
2032
2033          and
2034
2035          call (memory).  */
2036     }
2037
2038   if (gimple_code (stmt) == GIMPLE_PHI
2039       && gimple_bb (stmt) == data->current_loop->header)
2040     {
2041       iv = get_iv (data, PHI_RESULT (stmt));
2042
2043       if (iv && !integer_zerop (iv->step))
2044         return;
2045     }
2046
2047   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2048     {
2049       op = USE_FROM_PTR (use_p);
2050
2051       if (TREE_CODE (op) != SSA_NAME)
2052         continue;
2053
2054       iv = get_iv (data, op);
2055       if (!iv)
2056         continue;
2057
2058       find_interesting_uses_op (data, op);
2059     }
2060 }
2061
2062 /* Finds interesting uses of induction variables outside of loops
2063    on loop exit edge EXIT.  */
2064
2065 static void
2066 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2067 {
2068   gphi *phi;
2069   gphi_iterator psi;
2070   tree def;
2071
2072   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2073     {
2074       phi = psi.phi ();
2075       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2076       if (!virtual_operand_p (def))
2077         find_interesting_uses_op (data, def);
2078     }
2079 }
2080
2081 /* Finds uses of the induction variables that are interesting.  */
2082
2083 static void
2084 find_interesting_uses (struct ivopts_data *data)
2085 {
2086   basic_block bb;
2087   gimple_stmt_iterator bsi;
2088   basic_block *body = get_loop_body (data->current_loop);
2089   unsigned i;
2090   struct version_info *info;
2091   edge e;
2092
2093   if (dump_file && (dump_flags & TDF_DETAILS))
2094     fprintf (dump_file, "Uses:\n\n");
2095
2096   for (i = 0; i < data->current_loop->num_nodes; i++)
2097     {
2098       edge_iterator ei;
2099       bb = body[i];
2100
2101       FOR_EACH_EDGE (e, ei, bb->succs)
2102         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2103             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2104           find_interesting_uses_outside (data, e);
2105
2106       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2107         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2108       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2109         if (!is_gimple_debug (gsi_stmt (bsi)))
2110           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2111     }
2112
2113   if (dump_file && (dump_flags & TDF_DETAILS))
2114     {
2115       bitmap_iterator bi;
2116
2117       fprintf (dump_file, "\n");
2118
2119       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2120         {
2121           info = ver_info (data, i);
2122           if (info->inv_id)
2123             {
2124               fprintf (dump_file, "  ");
2125               print_generic_expr (dump_file, info->name, TDF_SLIM);
2126               fprintf (dump_file, " is invariant (%d)%s\n",
2127                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2128             }
2129         }
2130
2131       fprintf (dump_file, "\n");
2132     }
2133
2134   free (body);
2135 }
2136
2137 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2138    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2139    we are at the top-level of the processed address.  */
2140
2141 static tree
2142 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2143                 HOST_WIDE_INT *offset)
2144 {
2145   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2146   enum tree_code code;
2147   tree type, orig_type = TREE_TYPE (expr);
2148   HOST_WIDE_INT off0, off1, st;
2149   tree orig_expr = expr;
2150
2151   STRIP_NOPS (expr);
2152
2153   type = TREE_TYPE (expr);
2154   code = TREE_CODE (expr);
2155   *offset = 0;
2156
2157   switch (code)
2158     {
2159     case INTEGER_CST:
2160       if (!cst_and_fits_in_hwi (expr)
2161           || integer_zerop (expr))
2162         return orig_expr;
2163
2164       *offset = int_cst_value (expr);
2165       return build_int_cst (orig_type, 0);
2166
2167     case POINTER_PLUS_EXPR:
2168     case PLUS_EXPR:
2169     case MINUS_EXPR:
2170       op0 = TREE_OPERAND (expr, 0);
2171       op1 = TREE_OPERAND (expr, 1);
2172
2173       op0 = strip_offset_1 (op0, false, false, &off0);
2174       op1 = strip_offset_1 (op1, false, false, &off1);
2175
2176       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2177       if (op0 == TREE_OPERAND (expr, 0)
2178           && op1 == TREE_OPERAND (expr, 1))
2179         return orig_expr;
2180
2181       if (integer_zerop (op1))
2182         expr = op0;
2183       else if (integer_zerop (op0))
2184         {
2185           if (code == MINUS_EXPR)
2186             expr = fold_build1 (NEGATE_EXPR, type, op1);
2187           else
2188             expr = op1;
2189         }
2190       else
2191         expr = fold_build2 (code, type, op0, op1);
2192
2193       return fold_convert (orig_type, expr);
2194
2195     case MULT_EXPR:
2196       op1 = TREE_OPERAND (expr, 1);
2197       if (!cst_and_fits_in_hwi (op1))
2198         return orig_expr;
2199
2200       op0 = TREE_OPERAND (expr, 0);
2201       op0 = strip_offset_1 (op0, false, false, &off0);
2202       if (op0 == TREE_OPERAND (expr, 0))
2203         return orig_expr;
2204
2205       *offset = off0 * int_cst_value (op1);
2206       if (integer_zerop (op0))
2207         expr = op0;
2208       else
2209         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2210
2211       return fold_convert (orig_type, expr);
2212
2213     case ARRAY_REF:
2214     case ARRAY_RANGE_REF:
2215       if (!inside_addr)
2216         return orig_expr;
2217
2218       step = array_ref_element_size (expr);
2219       if (!cst_and_fits_in_hwi (step))
2220         break;
2221
2222       st = int_cst_value (step);
2223       op1 = TREE_OPERAND (expr, 1);
2224       op1 = strip_offset_1 (op1, false, false, &off1);
2225       *offset = off1 * st;
2226
2227       if (top_compref
2228           && integer_zerop (op1))
2229         {
2230           /* Strip the component reference completely.  */
2231           op0 = TREE_OPERAND (expr, 0);
2232           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2233           *offset += off0;
2234           return op0;
2235         }
2236       break;
2237
2238     case COMPONENT_REF:
2239       {
2240         tree field;
2241
2242         if (!inside_addr)
2243           return orig_expr;
2244
2245         tmp = component_ref_field_offset (expr);
2246         field = TREE_OPERAND (expr, 1);
2247         if (top_compref
2248             && cst_and_fits_in_hwi (tmp)
2249             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2250           {
2251             HOST_WIDE_INT boffset, abs_off;
2252
2253             /* Strip the component reference completely.  */
2254             op0 = TREE_OPERAND (expr, 0);
2255             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2256             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2257             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2258             if (boffset < 0)
2259               abs_off = -abs_off;
2260
2261             *offset = off0 + int_cst_value (tmp) + abs_off;
2262             return op0;
2263           }
2264       }
2265       break;
2266
2267     case ADDR_EXPR:
2268       op0 = TREE_OPERAND (expr, 0);
2269       op0 = strip_offset_1 (op0, true, true, &off0);
2270       *offset += off0;
2271
2272       if (op0 == TREE_OPERAND (expr, 0))
2273         return orig_expr;
2274
2275       expr = build_fold_addr_expr (op0);
2276       return fold_convert (orig_type, expr);
2277
2278     case MEM_REF:
2279       /* ???  Offset operand?  */
2280       inside_addr = false;
2281       break;
2282
2283     default:
2284       return orig_expr;
2285     }
2286
2287   /* Default handling of expressions for that we want to recurse into
2288      the first operand.  */
2289   op0 = TREE_OPERAND (expr, 0);
2290   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2291   *offset += off0;
2292
2293   if (op0 == TREE_OPERAND (expr, 0)
2294       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2295     return orig_expr;
2296
2297   expr = copy_node (expr);
2298   TREE_OPERAND (expr, 0) = op0;
2299   if (op1)
2300     TREE_OPERAND (expr, 1) = op1;
2301
2302   /* Inside address, we might strip the top level component references,
2303      thus changing type of the expression.  Handling of ADDR_EXPR
2304      will fix that.  */
2305   expr = fold_convert (orig_type, expr);
2306
2307   return expr;
2308 }
2309
2310 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2311
2312 static tree
2313 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2314 {
2315   HOST_WIDE_INT off;
2316   tree core = strip_offset_1 (expr, false, false, &off);
2317   *offset = off;
2318   return core;
2319 }
2320
2321 /* Returns variant of TYPE that can be used as base for different uses.
2322    We return unsigned type with the same precision, which avoids problems
2323    with overflows.  */
2324
2325 static tree
2326 generic_type_for (tree type)
2327 {
2328   if (POINTER_TYPE_P (type))
2329     return unsigned_type_for (type);
2330
2331   if (TYPE_UNSIGNED (type))
2332     return type;
2333
2334   return unsigned_type_for (type);
2335 }
2336
2337 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2338    the bitmap to that we should store it.  */
2339
2340 static struct ivopts_data *fd_ivopts_data;
2341 static tree
2342 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2343 {
2344   bitmap *depends_on = (bitmap *) data;
2345   struct version_info *info;
2346
2347   if (TREE_CODE (*expr_p) != SSA_NAME)
2348     return NULL_TREE;
2349   info = name_info (fd_ivopts_data, *expr_p);
2350
2351   if (!info->inv_id || info->has_nonlin_use)
2352     return NULL_TREE;
2353
2354   if (!*depends_on)
2355     *depends_on = BITMAP_ALLOC (NULL);
2356   bitmap_set_bit (*depends_on, info->inv_id);
2357
2358   return NULL_TREE;
2359 }
2360
2361 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2362    position to POS.  If USE is not NULL, the candidate is set as related to
2363    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2364    replacement of the final value of the iv by a direct computation.  */
2365
2366 static struct iv_cand *
2367 add_candidate_1 (struct ivopts_data *data,
2368                  tree base, tree step, bool important, enum iv_position pos,
2369                  struct iv_use *use, gimple incremented_at)
2370 {
2371   unsigned i;
2372   struct iv_cand *cand = NULL;
2373   tree type, orig_type;
2374
2375   /* For non-original variables, make sure their values are computed in a type
2376      that does not invoke undefined behavior on overflows (since in general,
2377      we cannot prove that these induction variables are non-wrapping).  */
2378   if (pos != IP_ORIGINAL)
2379     {
2380       orig_type = TREE_TYPE (base);
2381       type = generic_type_for (orig_type);
2382       if (type != orig_type)
2383         {
2384           base = fold_convert (type, base);
2385           step = fold_convert (type, step);
2386         }
2387     }
2388
2389   for (i = 0; i < n_iv_cands (data); i++)
2390     {
2391       cand = iv_cand (data, i);
2392
2393       if (cand->pos != pos)
2394         continue;
2395
2396       if (cand->incremented_at != incremented_at
2397           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2398               && cand->ainc_use != use))
2399         continue;
2400
2401       if (!cand->iv)
2402         {
2403           if (!base && !step)
2404             break;
2405
2406           continue;
2407         }
2408
2409       if (!base && !step)
2410         continue;
2411
2412       if (operand_equal_p (base, cand->iv->base, 0)
2413           && operand_equal_p (step, cand->iv->step, 0)
2414           && (TYPE_PRECISION (TREE_TYPE (base))
2415               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2416         break;
2417     }
2418
2419   if (i == n_iv_cands (data))
2420     {
2421       cand = XCNEW (struct iv_cand);
2422       cand->id = i;
2423
2424       if (!base && !step)
2425         cand->iv = NULL;
2426       else
2427         cand->iv = alloc_iv (base, step);
2428
2429       cand->pos = pos;
2430       if (pos != IP_ORIGINAL && cand->iv)
2431         {
2432           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2433           cand->var_after = cand->var_before;
2434         }
2435       cand->important = important;
2436       cand->incremented_at = incremented_at;
2437       data->iv_candidates.safe_push (cand);
2438
2439       if (step
2440           && TREE_CODE (step) != INTEGER_CST)
2441         {
2442           fd_ivopts_data = data;
2443           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2444         }
2445
2446       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2447         cand->ainc_use = use;
2448       else
2449         cand->ainc_use = NULL;
2450
2451       if (dump_file && (dump_flags & TDF_DETAILS))
2452         dump_cand (dump_file, cand);
2453     }
2454
2455   if (important && !cand->important)
2456     {
2457       cand->important = true;
2458       if (dump_file && (dump_flags & TDF_DETAILS))
2459         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2460     }
2461
2462   if (use)
2463     {
2464       bitmap_set_bit (use->related_cands, i);
2465       if (dump_file && (dump_flags & TDF_DETAILS))
2466         fprintf (dump_file, "Candidate %d is related to use %d\n",
2467                  cand->id, use->id);
2468     }
2469
2470   return cand;
2471 }
2472
2473 /* Returns true if incrementing the induction variable at the end of the LOOP
2474    is allowed.
2475
2476    The purpose is to avoid splitting latch edge with a biv increment, thus
2477    creating a jump, possibly confusing other optimization passes and leaving
2478    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2479    is not available (so we do not have a better alternative), or if the latch
2480    edge is already nonempty.  */
2481
2482 static bool
2483 allow_ip_end_pos_p (struct loop *loop)
2484 {
2485   if (!ip_normal_pos (loop))
2486     return true;
2487
2488   if (!empty_block_p (ip_end_pos (loop)))
2489     return true;
2490
2491   return false;
2492 }
2493
2494 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2495    Important field is set to IMPORTANT.  */
2496
2497 static void
2498 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2499                         bool important, struct iv_use *use)
2500 {
2501   basic_block use_bb = gimple_bb (use->stmt);
2502   machine_mode mem_mode;
2503   unsigned HOST_WIDE_INT cstepi;
2504
2505   /* If we insert the increment in any position other than the standard
2506      ones, we must ensure that it is incremented once per iteration.
2507      It must not be in an inner nested loop, or one side of an if
2508      statement.  */
2509   if (use_bb->loop_father != data->current_loop
2510       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2511       || stmt_could_throw_p (use->stmt)
2512       || !cst_and_fits_in_hwi (step))
2513     return;
2514
2515   cstepi = int_cst_value (step);
2516
2517   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2518   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2519         || USE_STORE_PRE_INCREMENT (mem_mode))
2520        && GET_MODE_SIZE (mem_mode) == cstepi)
2521       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2522            || USE_STORE_PRE_DECREMENT (mem_mode))
2523           && GET_MODE_SIZE (mem_mode) == -cstepi))
2524     {
2525       enum tree_code code = MINUS_EXPR;
2526       tree new_base;
2527       tree new_step = step;
2528
2529       if (POINTER_TYPE_P (TREE_TYPE (base)))
2530         {
2531           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2532           code = POINTER_PLUS_EXPR;
2533         }
2534       else
2535         new_step = fold_convert (TREE_TYPE (base), new_step);
2536       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2537       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2538                        use->stmt);
2539     }
2540   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2541         || USE_STORE_POST_INCREMENT (mem_mode))
2542        && GET_MODE_SIZE (mem_mode) == cstepi)
2543       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2544            || USE_STORE_POST_DECREMENT (mem_mode))
2545           && GET_MODE_SIZE (mem_mode) == -cstepi))
2546     {
2547       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2548                        use->stmt);
2549     }
2550 }
2551
2552 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2553    position to POS.  If USE is not NULL, the candidate is set as related to
2554    it.  The candidate computation is scheduled on all available positions.  */
2555
2556 static void
2557 add_candidate (struct ivopts_data *data,
2558                tree base, tree step, bool important, struct iv_use *use)
2559 {
2560   if (ip_normal_pos (data->current_loop))
2561     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2562   if (ip_end_pos (data->current_loop)
2563       && allow_ip_end_pos_p (data->current_loop))
2564     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2565
2566   if (use != NULL && use->type == USE_ADDRESS)
2567     add_autoinc_candidates (data, base, step, important, use);
2568 }
2569
2570 /* Adds standard iv candidates.  */
2571
2572 static void
2573 add_standard_iv_candidates (struct ivopts_data *data)
2574 {
2575   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2576
2577   /* The same for a double-integer type if it is still fast enough.  */
2578   if (TYPE_PRECISION
2579         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2580       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2581     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2582                    build_int_cst (long_integer_type_node, 1), true, NULL);
2583
2584   /* The same for a double-integer type if it is still fast enough.  */
2585   if (TYPE_PRECISION
2586         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2587       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2588     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2589                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2590 }
2591
2592
2593 /* Adds candidates bases on the old induction variable IV.  */
2594
2595 static void
2596 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2597 {
2598   gimple phi;
2599   tree def;
2600   struct iv_cand *cand;
2601
2602   add_candidate (data, iv->base, iv->step, true, NULL);
2603
2604   /* The same, but with initial value zero.  */
2605   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2606     add_candidate (data, size_int (0), iv->step, true, NULL);
2607   else
2608     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2609                    iv->step, true, NULL);
2610
2611   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2612   if (gimple_code (phi) == GIMPLE_PHI)
2613     {
2614       /* Additionally record the possibility of leaving the original iv
2615          untouched.  */
2616       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2617       /* Don't add candidate if it's from another PHI node because
2618          it's an affine iv appearing in the form of PEELED_CHREC.  */
2619       phi = SSA_NAME_DEF_STMT (def);
2620       if (gimple_code (phi) != GIMPLE_PHI)
2621         {
2622           cand = add_candidate_1 (data,
2623                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2624                                   SSA_NAME_DEF_STMT (def));
2625           cand->var_before = iv->ssa_name;
2626           cand->var_after = def;
2627         }
2628       else
2629         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2630     }
2631 }
2632
2633 /* Adds candidates based on the old induction variables.  */
2634
2635 static void
2636 add_old_ivs_candidates (struct ivopts_data *data)
2637 {
2638   unsigned i;
2639   struct iv *iv;
2640   bitmap_iterator bi;
2641
2642   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2643     {
2644       iv = ver_info (data, i)->iv;
2645       if (iv && iv->biv_p && !integer_zerop (iv->step))
2646         add_old_iv_candidates (data, iv);
2647     }
2648 }
2649
2650 /* Adds candidates based on the value of the induction variable IV and USE.  */
2651
2652 static void
2653 add_iv_value_candidates (struct ivopts_data *data,
2654                          struct iv *iv, struct iv_use *use)
2655 {
2656   unsigned HOST_WIDE_INT offset;
2657   tree base;
2658   tree basetype;
2659
2660   add_candidate (data, iv->base, iv->step, false, use);
2661
2662   /* The same, but with initial value zero.  Make such variable important,
2663      since it is generic enough so that possibly many uses may be based
2664      on it.  */
2665   basetype = TREE_TYPE (iv->base);
2666   if (POINTER_TYPE_P (basetype))
2667     basetype = sizetype;
2668   add_candidate (data, build_int_cst (basetype, 0),
2669                  iv->step, true, use);
2670
2671   /* Third, try removing the constant offset.  Make sure to even
2672      add a candidate for &a[0] vs. (T *)&a.  */
2673   base = strip_offset (iv->base, &offset);
2674   if (offset
2675       || base != iv->base)
2676     add_candidate (data, base, iv->step, false, use);
2677 }
2678
2679 /* Adds candidates based on the uses.  */
2680
2681 static void
2682 add_derived_ivs_candidates (struct ivopts_data *data)
2683 {
2684   unsigned i;
2685
2686   for (i = 0; i < n_iv_uses (data); i++)
2687     {
2688       struct iv_use *use = iv_use (data, i);
2689
2690       if (!use)
2691         continue;
2692
2693       switch (use->type)
2694         {
2695         case USE_NONLINEAR_EXPR:
2696         case USE_COMPARE:
2697         case USE_ADDRESS:
2698           /* Just add the ivs based on the value of the iv used here.  */
2699           add_iv_value_candidates (data, use->iv, use);
2700           break;
2701
2702         default:
2703           gcc_unreachable ();
2704         }
2705     }
2706 }
2707
2708 /* Record important candidates and add them to related_cands bitmaps
2709    if needed.  */
2710
2711 static void
2712 record_important_candidates (struct ivopts_data *data)
2713 {
2714   unsigned i;
2715   struct iv_use *use;
2716
2717   for (i = 0; i < n_iv_cands (data); i++)
2718     {
2719       struct iv_cand *cand = iv_cand (data, i);
2720
2721       if (cand->important)
2722         bitmap_set_bit (data->important_candidates, i);
2723     }
2724
2725   data->consider_all_candidates = (n_iv_cands (data)
2726                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2727
2728   if (data->consider_all_candidates)
2729     {
2730       /* We will not need "related_cands" bitmaps in this case,
2731          so release them to decrease peak memory consumption.  */
2732       for (i = 0; i < n_iv_uses (data); i++)
2733         {
2734           use = iv_use (data, i);
2735           BITMAP_FREE (use->related_cands);
2736         }
2737     }
2738   else
2739     {
2740       /* Add important candidates to the related_cands bitmaps.  */
2741       for (i = 0; i < n_iv_uses (data); i++)
2742         bitmap_ior_into (iv_use (data, i)->related_cands,
2743                          data->important_candidates);
2744     }
2745 }
2746
2747 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2748    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2749    we allocate a simple list to every use.  */
2750
2751 static void
2752 alloc_use_cost_map (struct ivopts_data *data)
2753 {
2754   unsigned i, size, s;
2755
2756   for (i = 0; i < n_iv_uses (data); i++)
2757     {
2758       struct iv_use *use = iv_use (data, i);
2759
2760       if (data->consider_all_candidates)
2761         size = n_iv_cands (data);
2762       else
2763         {
2764           s = bitmap_count_bits (use->related_cands);
2765
2766           /* Round up to the power of two, so that moduling by it is fast.  */
2767           size = s ? (1 << ceil_log2 (s)) : 1;
2768         }
2769
2770       use->n_map_members = size;
2771       use->cost_map = XCNEWVEC (struct cost_pair, size);
2772     }
2773 }
2774
2775 /* Returns description of computation cost of expression whose runtime
2776    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2777
2778 static comp_cost
2779 new_cost (unsigned runtime, unsigned complexity)
2780 {
2781   comp_cost cost;
2782
2783   cost.cost = runtime;
2784   cost.complexity = complexity;
2785
2786   return cost;
2787 }
2788
2789 /* Adds costs COST1 and COST2.  */
2790
2791 static comp_cost
2792 add_costs (comp_cost cost1, comp_cost cost2)
2793 {
2794   cost1.cost += cost2.cost;
2795   cost1.complexity += cost2.complexity;
2796
2797   return cost1;
2798 }
2799 /* Subtracts costs COST1 and COST2.  */
2800
2801 static comp_cost
2802 sub_costs (comp_cost cost1, comp_cost cost2)
2803 {
2804   cost1.cost -= cost2.cost;
2805   cost1.complexity -= cost2.complexity;
2806
2807   return cost1;
2808 }
2809
2810 /* Returns a negative number if COST1 < COST2, a positive number if
2811    COST1 > COST2, and 0 if COST1 = COST2.  */
2812
2813 static int
2814 compare_costs (comp_cost cost1, comp_cost cost2)
2815 {
2816   if (cost1.cost == cost2.cost)
2817     return cost1.complexity - cost2.complexity;
2818
2819   return cost1.cost - cost2.cost;
2820 }
2821
2822 /* Returns true if COST is infinite.  */
2823
2824 static bool
2825 infinite_cost_p (comp_cost cost)
2826 {
2827   return cost.cost == INFTY;
2828 }
2829
2830 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2831    on invariants DEPENDS_ON and that the value used in expressing it
2832    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2833
2834 static void
2835 set_use_iv_cost (struct ivopts_data *data,
2836                  struct iv_use *use, struct iv_cand *cand,
2837                  comp_cost cost, bitmap depends_on, tree value,
2838                  enum tree_code comp, int inv_expr_id)
2839 {
2840   unsigned i, s;
2841
2842   if (infinite_cost_p (cost))
2843     {
2844       BITMAP_FREE (depends_on);
2845       return;
2846     }
2847
2848   if (data->consider_all_candidates)
2849     {
2850       use->cost_map[cand->id].cand = cand;
2851       use->cost_map[cand->id].cost = cost;
2852       use->cost_map[cand->id].depends_on = depends_on;
2853       use->cost_map[cand->id].value = value;
2854       use->cost_map[cand->id].comp = comp;
2855       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2856       return;
2857     }
2858
2859   /* n_map_members is a power of two, so this computes modulo.  */
2860   s = cand->id & (use->n_map_members - 1);
2861   for (i = s; i < use->n_map_members; i++)
2862     if (!use->cost_map[i].cand)
2863       goto found;
2864   for (i = 0; i < s; i++)
2865     if (!use->cost_map[i].cand)
2866       goto found;
2867
2868   gcc_unreachable ();
2869
2870 found:
2871   use->cost_map[i].cand = cand;
2872   use->cost_map[i].cost = cost;
2873   use->cost_map[i].depends_on = depends_on;
2874   use->cost_map[i].value = value;
2875   use->cost_map[i].comp = comp;
2876   use->cost_map[i].inv_expr_id = inv_expr_id;
2877 }
2878
2879 /* Gets cost of (USE, CANDIDATE) pair.  */
2880
2881 static struct cost_pair *
2882 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2883                  struct iv_cand *cand)
2884 {
2885   unsigned i, s;
2886   struct cost_pair *ret;
2887
2888   if (!cand)
2889     return NULL;
2890
2891   if (data->consider_all_candidates)
2892     {
2893       ret = use->cost_map + cand->id;
2894       if (!ret->cand)
2895         return NULL;
2896
2897       return ret;
2898     }
2899
2900   /* n_map_members is a power of two, so this computes modulo.  */
2901   s = cand->id & (use->n_map_members - 1);
2902   for (i = s; i < use->n_map_members; i++)
2903     if (use->cost_map[i].cand == cand)
2904       return use->cost_map + i;
2905     else if (use->cost_map[i].cand == NULL)
2906       return NULL;
2907   for (i = 0; i < s; i++)
2908     if (use->cost_map[i].cand == cand)
2909       return use->cost_map + i;
2910     else if (use->cost_map[i].cand == NULL)
2911       return NULL;
2912
2913   return NULL;
2914 }
2915
2916 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2917 static rtx
2918 produce_memory_decl_rtl (tree obj, int *regno)
2919 {
2920   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2921   machine_mode address_mode = targetm.addr_space.address_mode (as);
2922   rtx x;
2923
2924   gcc_assert (obj);
2925   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2926     {
2927       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2928       x = gen_rtx_SYMBOL_REF (address_mode, name);
2929       SET_SYMBOL_REF_DECL (x, obj);
2930       x = gen_rtx_MEM (DECL_MODE (obj), x);
2931       set_mem_addr_space (x, as);
2932       targetm.encode_section_info (obj, x, true);
2933     }
2934   else
2935     {
2936       x = gen_raw_REG (address_mode, (*regno)++);
2937       x = gen_rtx_MEM (DECL_MODE (obj), x);
2938       set_mem_addr_space (x, as);
2939     }
2940
2941   return x;
2942 }
2943
2944 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2945    walk_tree.  DATA contains the actual fake register number.  */
2946
2947 static tree
2948 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2949 {
2950   tree obj = NULL_TREE;
2951   rtx x = NULL_RTX;
2952   int *regno = (int *) data;
2953
2954   switch (TREE_CODE (*expr_p))
2955     {
2956     case ADDR_EXPR:
2957       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2958            handled_component_p (*expr_p);
2959            expr_p = &TREE_OPERAND (*expr_p, 0))
2960         continue;
2961       obj = *expr_p;
2962       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2963         x = produce_memory_decl_rtl (obj, regno);
2964       break;
2965
2966     case SSA_NAME:
2967       *ws = 0;
2968       obj = SSA_NAME_VAR (*expr_p);
2969       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2970       if (!obj)
2971         return NULL_TREE;
2972       if (!DECL_RTL_SET_P (obj))
2973         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2974       break;
2975
2976     case VAR_DECL:
2977     case PARM_DECL:
2978     case RESULT_DECL:
2979       *ws = 0;
2980       obj = *expr_p;
2981
2982       if (DECL_RTL_SET_P (obj))
2983         break;
2984
2985       if (DECL_MODE (obj) == BLKmode)
2986         x = produce_memory_decl_rtl (obj, regno);
2987       else
2988         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2989
2990       break;
2991
2992     default:
2993       break;
2994     }
2995
2996   if (x)
2997     {
2998       decl_rtl_to_reset.safe_push (obj);
2999       SET_DECL_RTL (obj, x);
3000     }
3001
3002   return NULL_TREE;
3003 }
3004
3005 /* Determines cost of the computation of EXPR.  */
3006
3007 static unsigned
3008 computation_cost (tree expr, bool speed)
3009 {
3010   rtx_insn *seq;
3011   rtx rslt;
3012   tree type = TREE_TYPE (expr);
3013   unsigned cost;
3014   /* Avoid using hard regs in ways which may be unsupported.  */
3015   int regno = LAST_VIRTUAL_REGISTER + 1;
3016   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3017   enum node_frequency real_frequency = node->frequency;
3018
3019   node->frequency = NODE_FREQUENCY_NORMAL;
3020   crtl->maybe_hot_insn_p = speed;
3021   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3022   start_sequence ();
3023   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3024   seq = get_insns ();
3025   end_sequence ();
3026   default_rtl_profile ();
3027   node->frequency = real_frequency;
3028
3029   cost = seq_cost (seq, speed);
3030   if (MEM_P (rslt))
3031     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3032                           TYPE_ADDR_SPACE (type), speed);
3033   else if (!REG_P (rslt))
3034     cost += set_src_cost (rslt, speed);
3035
3036   return cost;
3037 }
3038
3039 /* Returns variable containing the value of candidate CAND at statement AT.  */
3040
3041 static tree
3042 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3043 {
3044   if (stmt_after_increment (loop, cand, stmt))
3045     return cand->var_after;
3046   else
3047     return cand->var_before;
3048 }
3049
3050 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3051    same precision that is at least as wide as the precision of TYPE, stores
3052    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3053    type of A and B.  */
3054
3055 static tree
3056 determine_common_wider_type (tree *a, tree *b)
3057 {
3058   tree wider_type = NULL;
3059   tree suba, subb;
3060   tree atype = TREE_TYPE (*a);
3061
3062   if (CONVERT_EXPR_P (*a))
3063     {
3064       suba = TREE_OPERAND (*a, 0);
3065       wider_type = TREE_TYPE (suba);
3066       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3067         return atype;
3068     }
3069   else
3070     return atype;
3071
3072   if (CONVERT_EXPR_P (*b))
3073     {
3074       subb = TREE_OPERAND (*b, 0);
3075       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3076         return atype;
3077     }
3078   else
3079     return atype;
3080
3081   *a = suba;
3082   *b = subb;
3083   return wider_type;
3084 }
3085
3086 /* Determines the expression by that USE is expressed from induction variable
3087    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3088    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3089
3090 static bool
3091 get_computation_aff (struct loop *loop,
3092                      struct iv_use *use, struct iv_cand *cand, gimple at,
3093                      struct aff_tree *aff)
3094 {
3095   tree ubase = use->iv->base;
3096   tree ustep = use->iv->step;
3097   tree cbase = cand->iv->base;
3098   tree cstep = cand->iv->step, cstep_common;
3099   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3100   tree common_type, var;
3101   tree uutype;
3102   aff_tree cbase_aff, var_aff;
3103   widest_int rat;
3104
3105   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3106     {
3107       /* We do not have a precision to express the values of use.  */
3108       return false;
3109     }
3110
3111   var = var_at_stmt (loop, cand, at);
3112   uutype = unsigned_type_for (utype);
3113
3114   /* If the conversion is not noop, perform it.  */
3115   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3116     {
3117       cstep = fold_convert (uutype, cstep);
3118       cbase = fold_convert (uutype, cbase);
3119       var = fold_convert (uutype, var);
3120     }
3121
3122   if (!constant_multiple_of (ustep, cstep, &rat))
3123     return false;
3124
3125   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3126      type, we achieve better folding by computing their difference in this
3127      wider type, and cast the result to UUTYPE.  We do not need to worry about
3128      overflows, as all the arithmetics will in the end be performed in UUTYPE
3129      anyway.  */
3130   common_type = determine_common_wider_type (&ubase, &cbase);
3131
3132   /* use = ubase - ratio * cbase + ratio * var.  */
3133   tree_to_aff_combination (ubase, common_type, aff);
3134   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3135   tree_to_aff_combination (var, uutype, &var_aff);
3136
3137   /* We need to shift the value if we are after the increment.  */
3138   if (stmt_after_increment (loop, cand, at))
3139     {
3140       aff_tree cstep_aff;
3141
3142       if (common_type != uutype)
3143         cstep_common = fold_convert (common_type, cstep);
3144       else
3145         cstep_common = cstep;
3146
3147       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3148       aff_combination_add (&cbase_aff, &cstep_aff);
3149     }
3150
3151   aff_combination_scale (&cbase_aff, -rat);
3152   aff_combination_add (aff, &cbase_aff);
3153   if (common_type != uutype)
3154     aff_combination_convert (aff, uutype);
3155
3156   aff_combination_scale (&var_aff, rat);
3157   aff_combination_add (aff, &var_aff);
3158
3159   return true;
3160 }
3161
3162 /* Return the type of USE.  */
3163
3164 static tree
3165 get_use_type (struct iv_use *use)
3166 {
3167   tree base_type = TREE_TYPE (use->iv->base);
3168   tree type;
3169
3170   if (use->type == USE_ADDRESS)
3171     {
3172       /* The base_type may be a void pointer.  Create a pointer type based on
3173          the mem_ref instead.  */
3174       type = build_pointer_type (TREE_TYPE (*use->op_p));
3175       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3176                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3177     }
3178   else
3179     type = base_type;
3180
3181   return type;
3182 }
3183
3184 /* Determines the expression by that USE is expressed from induction variable
3185    CAND at statement AT in LOOP.  The computation is unshared.  */
3186
3187 static tree
3188 get_computation_at (struct loop *loop,
3189                     struct iv_use *use, struct iv_cand *cand, gimple at)
3190 {
3191   aff_tree aff;
3192   tree type = get_use_type (use);
3193
3194   if (!get_computation_aff (loop, use, cand, at, &aff))
3195     return NULL_TREE;
3196   unshare_aff_combination (&aff);
3197   return fold_convert (type, aff_combination_to_tree (&aff));
3198 }
3199
3200 /* Determines the expression by that USE is expressed from induction variable
3201    CAND in LOOP.  The computation is unshared.  */
3202
3203 static tree
3204 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3205 {
3206   return get_computation_at (loop, use, cand, use->stmt);
3207 }
3208
3209 /* Adjust the cost COST for being in loop setup rather than loop body.
3210    If we're optimizing for space, the loop setup overhead is constant;
3211    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3212 static unsigned
3213 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3214 {
3215   if (cost == INFTY)
3216     return cost;
3217   else if (optimize_loop_for_speed_p (data->current_loop))
3218     return cost / avg_loop_niter (data->current_loop);
3219   else
3220     return cost;
3221 }
3222
3223 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3224    validity for a memory reference accessing memory of mode MODE in
3225    address space AS.  */
3226
3227
3228 bool
3229 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3230                                  addr_space_t as)
3231 {
3232 #define MAX_RATIO 128
3233   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3234   static vec<sbitmap> valid_mult_list;
3235   sbitmap valid_mult;
3236
3237   if (data_index >= valid_mult_list.length ())
3238     valid_mult_list.safe_grow_cleared (data_index + 1);
3239
3240   valid_mult = valid_mult_list[data_index];
3241   if (!valid_mult)
3242     {
3243       machine_mode address_mode = targetm.addr_space.address_mode (as);
3244       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3245       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3246       rtx addr, scaled;
3247       HOST_WIDE_INT i;
3248
3249       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3250       bitmap_clear (valid_mult);
3251       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3252       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3253       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3254         {
3255           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3256           if (memory_address_addr_space_p (mode, addr, as)
3257               || memory_address_addr_space_p (mode, scaled, as))
3258             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3259         }
3260
3261       if (dump_file && (dump_flags & TDF_DETAILS))
3262         {
3263           fprintf (dump_file, "  allowed multipliers:");
3264           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3265             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3266               fprintf (dump_file, " %d", (int) i);
3267           fprintf (dump_file, "\n");
3268           fprintf (dump_file, "\n");
3269         }
3270
3271       valid_mult_list[data_index] = valid_mult;
3272     }
3273
3274   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3275     return false;
3276
3277   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3278 }
3279
3280 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3281    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3282    variable is omitted.  Compute the cost for a memory reference that accesses
3283    a memory location of mode MEM_MODE in address space AS.
3284
3285    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3286    size of MEM_MODE / RATIO) is available.  To make this determination, we
3287    look at the size of the increment to be made, which is given in CSTEP.
3288    CSTEP may be zero if the step is unknown.
3289    STMT_AFTER_INC is true iff the statement we're looking at is after the
3290    increment of the original biv.
3291
3292    TODO -- there must be some better way.  This all is quite crude.  */
3293
3294 enum ainc_type
3295 {
3296   AINC_PRE_INC,         /* Pre increment.  */
3297   AINC_PRE_DEC,         /* Pre decrement.  */
3298   AINC_POST_INC,        /* Post increment.  */
3299   AINC_POST_DEC,        /* Post decrement.  */
3300   AINC_NONE             /* Also the number of auto increment types.  */
3301 };
3302
3303 typedef struct address_cost_data_s
3304 {
3305   HOST_WIDE_INT min_offset, max_offset;
3306   unsigned costs[2][2][2][2];
3307   unsigned ainc_costs[AINC_NONE];
3308 } *address_cost_data;
3309
3310
3311 static comp_cost
3312 get_address_cost (bool symbol_present, bool var_present,
3313                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3314                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3315                   addr_space_t as, bool speed,
3316                   bool stmt_after_inc, bool *may_autoinc)
3317 {
3318   machine_mode address_mode = targetm.addr_space.address_mode (as);
3319   static vec<address_cost_data> address_cost_data_list;
3320   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3321   address_cost_data data;
3322   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3323   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3324   unsigned cost, acost, complexity;
3325   enum ainc_type autoinc_type;
3326   bool offset_p, ratio_p, autoinc;
3327   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3328   unsigned HOST_WIDE_INT mask;
3329   unsigned bits;
3330
3331   if (data_index >= address_cost_data_list.length ())
3332     address_cost_data_list.safe_grow_cleared (data_index + 1);
3333
3334   data = address_cost_data_list[data_index];
3335   if (!data)
3336     {
3337       HOST_WIDE_INT i;
3338       HOST_WIDE_INT rat, off = 0;
3339       int old_cse_not_expected, width;
3340       unsigned sym_p, var_p, off_p, rat_p, add_c;
3341       rtx_insn *seq;
3342       rtx addr, base;
3343       rtx reg0, reg1;
3344
3345       data = (address_cost_data) xcalloc (1, sizeof (*data));
3346
3347       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3348
3349       width = GET_MODE_BITSIZE (address_mode) - 1;
3350       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3351         width = HOST_BITS_PER_WIDE_INT - 1;
3352       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3353
3354       for (i = width; i >= 0; i--)
3355         {
3356           off = -((unsigned HOST_WIDE_INT) 1 << i);
3357           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3358           if (memory_address_addr_space_p (mem_mode, addr, as))
3359             break;
3360         }
3361       data->min_offset = (i == -1? 0 : off);
3362
3363       for (i = width; i >= 0; i--)
3364         {
3365           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3366           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3367           if (memory_address_addr_space_p (mem_mode, addr, as))
3368             break;
3369           /* For some strict-alignment targets, the offset must be naturally
3370              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3371           off = mem_mode != QImode
3372                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3373                     - GET_MODE_SIZE (mem_mode)
3374                 : 0;
3375           if (off > 0)
3376             {
3377               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3378               if (memory_address_addr_space_p (mem_mode, addr, as))
3379                 break;
3380             }
3381         }
3382       if (i == -1)
3383         off = 0;
3384       data->max_offset = off;
3385
3386       if (dump_file && (dump_flags & TDF_DETAILS))
3387         {
3388           fprintf (dump_file, "get_address_cost:\n");
3389           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3390                    GET_MODE_NAME (mem_mode),
3391                    data->min_offset);
3392           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3393                    GET_MODE_NAME (mem_mode),
3394                    data->max_offset);
3395         }
3396
3397       rat = 1;
3398       for (i = 2; i <= MAX_RATIO; i++)
3399         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3400           {
3401             rat = i;
3402             break;
3403           }
3404
3405       /* Compute the cost of various addressing modes.  */
3406       acost = 0;
3407       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3408       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3409
3410       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3411           || USE_STORE_PRE_DECREMENT (mem_mode))
3412         {
3413           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3414           has_predec[mem_mode]
3415             = memory_address_addr_space_p (mem_mode, addr, as);
3416
3417           if (has_predec[mem_mode])
3418             data->ainc_costs[AINC_PRE_DEC]
3419               = address_cost (addr, mem_mode, as, speed);
3420         }
3421       if (USE_LOAD_POST_DECREMENT (mem_mode)
3422           || USE_STORE_POST_DECREMENT (mem_mode))
3423         {
3424           addr = gen_rtx_POST_DEC (address_mode, reg0);
3425           has_postdec[mem_mode]
3426             = memory_address_addr_space_p (mem_mode, addr, as);
3427
3428           if (has_postdec[mem_mode])
3429             data->ainc_costs[AINC_POST_DEC]
3430               = address_cost (addr, mem_mode, as, speed);
3431         }
3432       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3433           || USE_STORE_PRE_DECREMENT (mem_mode))
3434         {
3435           addr = gen_rtx_PRE_INC (address_mode, reg0);
3436           has_preinc[mem_mode]
3437             = memory_address_addr_space_p (mem_mode, addr, as);
3438
3439           if (has_preinc[mem_mode])
3440             data->ainc_costs[AINC_PRE_INC]
3441               = address_cost (addr, mem_mode, as, speed);
3442         }
3443       if (USE_LOAD_POST_INCREMENT (mem_mode)
3444           || USE_STORE_POST_INCREMENT (mem_mode))
3445         {
3446           addr = gen_rtx_POST_INC (address_mode, reg0);
3447           has_postinc[mem_mode]
3448             = memory_address_addr_space_p (mem_mode, addr, as);
3449
3450           if (has_postinc[mem_mode])
3451             data->ainc_costs[AINC_POST_INC]
3452               = address_cost (addr, mem_mode, as, speed);
3453         }
3454       for (i = 0; i < 16; i++)
3455         {
3456           sym_p = i & 1;
3457           var_p = (i >> 1) & 1;
3458           off_p = (i >> 2) & 1;
3459           rat_p = (i >> 3) & 1;
3460
3461           addr = reg0;
3462           if (rat_p)
3463             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3464                                    gen_int_mode (rat, address_mode));
3465
3466           if (var_p)
3467             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3468
3469           if (sym_p)
3470             {
3471               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3472               /* ??? We can run into trouble with some backends by presenting
3473                  it with symbols which haven't been properly passed through
3474                  targetm.encode_section_info.  By setting the local bit, we
3475                  enhance the probability of things working.  */
3476               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3477
3478               if (off_p)
3479                 base = gen_rtx_fmt_e (CONST, address_mode,
3480                                       gen_rtx_fmt_ee
3481                                         (PLUS, address_mode, base,
3482                                          gen_int_mode (off, address_mode)));
3483             }
3484           else if (off_p)
3485             base = gen_int_mode (off, address_mode);
3486           else
3487             base = NULL_RTX;
3488
3489           if (base)
3490             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3491
3492           start_sequence ();
3493           /* To avoid splitting addressing modes, pretend that no cse will
3494              follow.  */
3495           old_cse_not_expected = cse_not_expected;
3496           cse_not_expected = true;
3497           addr = memory_address_addr_space (mem_mode, addr, as);
3498           cse_not_expected = old_cse_not_expected;
3499           seq = get_insns ();
3500           end_sequence ();
3501
3502           acost = seq_cost (seq, speed);
3503           acost += address_cost (addr, mem_mode, as, speed);
3504
3505           if (!acost)
3506             acost = 1;
3507           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3508         }
3509
3510       /* On some targets, it is quite expensive to load symbol to a register,
3511          which makes addresses that contain symbols look much more expensive.
3512          However, the symbol will have to be loaded in any case before the
3513          loop (and quite likely we have it in register already), so it does not
3514          make much sense to penalize them too heavily.  So make some final
3515          tweaks for the SYMBOL_PRESENT modes:
3516
3517          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3518          var is cheaper, use this mode with small penalty.
3519          If VAR_PRESENT is true, try whether the mode with
3520          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3521          if this is the case, use it.  */
3522       add_c = add_cost (speed, address_mode);
3523       for (i = 0; i < 8; i++)
3524         {
3525           var_p = i & 1;
3526           off_p = (i >> 1) & 1;
3527           rat_p = (i >> 2) & 1;
3528
3529           acost = data->costs[0][1][off_p][rat_p] + 1;
3530           if (var_p)
3531             acost += add_c;
3532
3533           if (acost < data->costs[1][var_p][off_p][rat_p])
3534             data->costs[1][var_p][off_p][rat_p] = acost;
3535         }
3536
3537       if (dump_file && (dump_flags & TDF_DETAILS))
3538         {
3539           fprintf (dump_file, "Address costs:\n");
3540
3541           for (i = 0; i < 16; i++)
3542             {
3543               sym_p = i & 1;
3544               var_p = (i >> 1) & 1;
3545               off_p = (i >> 2) & 1;
3546               rat_p = (i >> 3) & 1;
3547
3548               fprintf (dump_file, "  ");
3549               if (sym_p)
3550                 fprintf (dump_file, "sym + ");
3551               if (var_p)
3552                 fprintf (dump_file, "var + ");
3553               if (off_p)
3554                 fprintf (dump_file, "cst + ");
3555               if (rat_p)
3556                 fprintf (dump_file, "rat * ");
3557
3558               acost = data->costs[sym_p][var_p][off_p][rat_p];
3559               fprintf (dump_file, "index costs %d\n", acost);
3560             }
3561           if (has_predec[mem_mode] || has_postdec[mem_mode]
3562               || has_preinc[mem_mode] || has_postinc[mem_mode])
3563             fprintf (dump_file, "  May include autoinc/dec\n");
3564           fprintf (dump_file, "\n");
3565         }
3566
3567       address_cost_data_list[data_index] = data;
3568     }
3569
3570   bits = GET_MODE_BITSIZE (address_mode);
3571   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3572   offset &= mask;
3573   if ((offset >> (bits - 1) & 1))
3574     offset |= ~mask;
3575   s_offset = offset;
3576
3577   autoinc = false;
3578   autoinc_type = AINC_NONE;
3579   msize = GET_MODE_SIZE (mem_mode);
3580   autoinc_offset = offset;
3581   if (stmt_after_inc)
3582     autoinc_offset += ratio * cstep;
3583   if (symbol_present || var_present || ratio != 1)
3584     autoinc = false;
3585   else
3586     {
3587       if (has_postinc[mem_mode] && autoinc_offset == 0
3588           && msize == cstep)
3589         autoinc_type = AINC_POST_INC;
3590       else if (has_postdec[mem_mode] && autoinc_offset == 0
3591                && msize == -cstep)
3592         autoinc_type = AINC_POST_DEC;
3593       else if (has_preinc[mem_mode] && autoinc_offset == msize
3594                && msize == cstep)
3595         autoinc_type = AINC_PRE_INC;
3596       else if (has_predec[mem_mode] && autoinc_offset == -msize
3597                && msize == -cstep)
3598         autoinc_type = AINC_PRE_DEC;
3599
3600       if (autoinc_type != AINC_NONE)
3601         autoinc = true;
3602     }
3603
3604   cost = 0;
3605   offset_p = (s_offset != 0
3606               && data->min_offset <= s_offset
3607               && s_offset <= data->max_offset);
3608   ratio_p = (ratio != 1
3609              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3610
3611   if (ratio != 1 && !ratio_p)
3612     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3613
3614   if (s_offset && !offset_p && !symbol_present)
3615     cost += add_cost (speed, address_mode);
3616
3617   if (may_autoinc)
3618     *may_autoinc = autoinc;
3619   if (autoinc)
3620     acost = data->ainc_costs[autoinc_type];
3621   else
3622     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3623   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3624   return new_cost (cost + acost, complexity);
3625 }
3626
3627  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3628     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3629     calculating the operands of EXPR.  Returns true if successful, and returns
3630     the cost in COST.  */
3631
3632 static bool
3633 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3634                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3635 {
3636   comp_cost res;
3637   tree op1 = TREE_OPERAND (expr, 1);
3638   tree cst = TREE_OPERAND (mult, 1);
3639   tree multop = TREE_OPERAND (mult, 0);
3640   int m = exact_log2 (int_cst_value (cst));
3641   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3642   int as_cost, sa_cost;
3643   bool mult_in_op1;
3644
3645   if (!(m >= 0 && m < maxm))
3646     return false;
3647
3648   mult_in_op1 = operand_equal_p (op1, mult, 0);
3649
3650   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3651
3652   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3653      use that in preference to a shift insn followed by an add insn.  */
3654   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3655              ? shiftadd_cost (speed, mode, m)
3656              : (mult_in_op1
3657                 ? shiftsub1_cost (speed, mode, m)
3658                 : shiftsub0_cost (speed, mode, m)));
3659
3660   res = new_cost (MIN (as_cost, sa_cost), 0);
3661   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3662
3663   STRIP_NOPS (multop);
3664   if (!is_gimple_val (multop))
3665     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3666
3667   *cost = res;
3668   return true;
3669 }
3670
3671 /* Estimates cost of forcing expression EXPR into a variable.  */
3672
3673 static comp_cost
3674 force_expr_to_var_cost (tree expr, bool speed)
3675 {
3676   static bool costs_initialized = false;
3677   static unsigned integer_cost [2];
3678   static unsigned symbol_cost [2];
3679   static unsigned address_cost [2];
3680   tree op0, op1;
3681   comp_cost cost0, cost1, cost;
3682   machine_mode mode;
3683
3684   if (!costs_initialized)
3685     {
3686       tree type = build_pointer_type (integer_type_node);
3687       tree var, addr;
3688       rtx x;
3689       int i;
3690
3691       var = create_tmp_var_raw (integer_type_node, "test_var");
3692       TREE_STATIC (var) = 1;
3693       x = produce_memory_decl_rtl (var, NULL);
3694       SET_DECL_RTL (var, x);
3695
3696       addr = build1 (ADDR_EXPR, type, var);
3697
3698
3699       for (i = 0; i < 2; i++)
3700         {
3701           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3702                                                              2000), i);
3703
3704           symbol_cost[i] = computation_cost (addr, i) + 1;
3705
3706           address_cost[i]
3707             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3708           if (dump_file && (dump_flags & TDF_DETAILS))
3709             {
3710               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3711               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3712               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3713               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3714               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3715               fprintf (dump_file, "\n");
3716             }
3717         }
3718
3719       costs_initialized = true;
3720     }
3721
3722   STRIP_NOPS (expr);
3723
3724   if (SSA_VAR_P (expr))
3725     return no_cost;
3726
3727   if (is_gimple_min_invariant (expr))
3728     {
3729       if (TREE_CODE (expr) == INTEGER_CST)
3730         return new_cost (integer_cost [speed], 0);
3731
3732       if (TREE_CODE (expr) == ADDR_EXPR)
3733         {
3734           tree obj = TREE_OPERAND (expr, 0);
3735
3736           if (TREE_CODE (obj) == VAR_DECL
3737               || TREE_CODE (obj) == PARM_DECL
3738               || TREE_CODE (obj) == RESULT_DECL)
3739             return new_cost (symbol_cost [speed], 0);
3740         }
3741
3742       return new_cost (address_cost [speed], 0);
3743     }
3744
3745   switch (TREE_CODE (expr))
3746     {
3747     case POINTER_PLUS_EXPR:
3748     case PLUS_EXPR:
3749     case MINUS_EXPR:
3750     case MULT_EXPR:
3751       op0 = TREE_OPERAND (expr, 0);
3752       op1 = TREE_OPERAND (expr, 1);
3753       STRIP_NOPS (op0);
3754       STRIP_NOPS (op1);
3755       break;
3756
3757     CASE_CONVERT:
3758     case NEGATE_EXPR:
3759       op0 = TREE_OPERAND (expr, 0);
3760       STRIP_NOPS (op0);
3761       op1 = NULL_TREE;
3762       break;
3763
3764     default:
3765       /* Just an arbitrary value, FIXME.  */
3766       return new_cost (target_spill_cost[speed], 0);
3767     }
3768
3769   if (op0 == NULL_TREE
3770       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3771     cost0 = no_cost;
3772   else
3773     cost0 = force_expr_to_var_cost (op0, speed);
3774
3775   if (op1 == NULL_TREE
3776       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3777     cost1 = no_cost;
3778   else
3779     cost1 = force_expr_to_var_cost (op1, speed);
3780
3781   mode = TYPE_MODE (TREE_TYPE (expr));
3782   switch (TREE_CODE (expr))
3783     {
3784     case POINTER_PLUS_EXPR:
3785     case PLUS_EXPR:
3786     case MINUS_EXPR:
3787     case NEGATE_EXPR:
3788       cost = new_cost (add_cost (speed, mode), 0);
3789       if (TREE_CODE (expr) != NEGATE_EXPR)
3790         {
3791           tree mult = NULL_TREE;
3792           comp_cost sa_cost;
3793           if (TREE_CODE (op1) == MULT_EXPR)
3794             mult = op1;
3795           else if (TREE_CODE (op0) == MULT_EXPR)
3796             mult = op0;
3797
3798           if (mult != NULL_TREE
3799               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3800               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3801                                     speed, &sa_cost))
3802             return sa_cost;
3803         }
3804       break;
3805
3806     CASE_CONVERT:
3807       {
3808         tree inner_mode, outer_mode;
3809         outer_mode = TREE_TYPE (expr);
3810         inner_mode = TREE_TYPE (op0);
3811         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3812                                        TYPE_MODE (inner_mode), speed), 0);
3813       }
3814       break;
3815
3816     case MULT_EXPR:
3817       if (cst_and_fits_in_hwi (op0))
3818         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3819                                              mode, speed), 0);
3820       else if (cst_and_fits_in_hwi (op1))
3821         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3822                                              mode, speed), 0);
3823       else
3824         return new_cost (target_spill_cost [speed], 0);
3825       break;
3826
3827     default:
3828       gcc_unreachable ();
3829     }
3830
3831   cost = add_costs (cost, cost0);
3832   cost = add_costs (cost, cost1);
3833
3834   /* Bound the cost by target_spill_cost.  The parts of complicated
3835      computations often are either loop invariant or at least can
3836      be shared between several iv uses, so letting this grow without
3837      limits would not give reasonable results.  */
3838   if (cost.cost > (int) target_spill_cost [speed])
3839     cost.cost = target_spill_cost [speed];
3840
3841   return cost;
3842 }
3843
3844 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3845    invariants the computation depends on.  */
3846
3847 static comp_cost
3848 force_var_cost (struct ivopts_data *data,
3849                 tree expr, bitmap *depends_on)
3850 {
3851   if (depends_on)
3852     {
3853       fd_ivopts_data = data;
3854       walk_tree (&expr, find_depends, depends_on, NULL);
3855     }
3856
3857   return force_expr_to_var_cost (expr, data->speed);
3858 }
3859
3860 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3861    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3862    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3863    invariants the computation depends on.  */
3864
3865 static comp_cost
3866 split_address_cost (struct ivopts_data *data,
3867                     tree addr, bool *symbol_present, bool *var_present,
3868                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3869 {
3870   tree core;
3871   HOST_WIDE_INT bitsize;
3872   HOST_WIDE_INT bitpos;
3873   tree toffset;
3874   machine_mode mode;
3875   int unsignedp, volatilep;
3876
3877   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3878                               &unsignedp, &volatilep, false);
3879
3880   if (toffset != 0
3881       || bitpos % BITS_PER_UNIT != 0
3882       || TREE_CODE (core) != VAR_DECL)
3883     {
3884       *symbol_present = false;
3885       *var_present = true;
3886       fd_ivopts_data = data;
3887       walk_tree (&addr, find_depends, depends_on, NULL);
3888       return new_cost (target_spill_cost[data->speed], 0);
3889     }
3890
3891   *offset += bitpos / BITS_PER_UNIT;
3892   if (TREE_STATIC (core)
3893       || DECL_EXTERNAL (core))
3894     {
3895       *symbol_present = true;
3896       *var_present = false;
3897       return no_cost;
3898     }
3899
3900   *symbol_present = false;
3901   *var_present = true;
3902   return no_cost;
3903 }
3904
3905 /* Estimates cost of expressing difference of addresses E1 - E2 as
3906    var + symbol + offset.  The value of offset is added to OFFSET,
3907    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3908    part is missing.  DEPENDS_ON is a set of the invariants the computation
3909    depends on.  */
3910
3911 static comp_cost
3912 ptr_difference_cost (struct ivopts_data *data,
3913                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3914                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3915 {
3916   HOST_WIDE_INT diff = 0;
3917   aff_tree aff_e1, aff_e2;
3918   tree type;
3919
3920   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3921
3922   if (ptr_difference_const (e1, e2, &diff))
3923     {
3924       *offset += diff;
3925       *symbol_present = false;
3926       *var_present = false;
3927       return no_cost;
3928     }
3929
3930   if (integer_zerop (e2))
3931     return split_address_cost (data, TREE_OPERAND (e1, 0),
3932                                symbol_present, var_present, offset, depends_on);
3933
3934   *symbol_present = false;
3935   *var_present = true;
3936
3937   type = signed_type_for (TREE_TYPE (e1));
3938   tree_to_aff_combination (e1, type, &aff_e1);
3939   tree_to_aff_combination (e2, type, &aff_e2);
3940   aff_combination_scale (&aff_e2, -1);
3941   aff_combination_add (&aff_e1, &aff_e2);
3942
3943   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3944 }
3945
3946 /* Estimates cost of expressing difference E1 - E2 as
3947    var + symbol + offset.  The value of offset is added to OFFSET,
3948    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3949    part is missing.  DEPENDS_ON is a set of the invariants the computation
3950    depends on.  */
3951
3952 static comp_cost
3953 difference_cost (struct ivopts_data *data,
3954                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3955                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3956 {
3957   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3958   unsigned HOST_WIDE_INT off1, off2;
3959   aff_tree aff_e1, aff_e2;
3960   tree type;
3961
3962   e1 = strip_offset (e1, &off1);
3963   e2 = strip_offset (e2, &off2);
3964   *offset += off1 - off2;
3965
3966   STRIP_NOPS (e1);
3967   STRIP_NOPS (e2);
3968
3969   if (TREE_CODE (e1) == ADDR_EXPR)
3970     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3971                                 offset, depends_on);
3972   *symbol_present = false;
3973
3974   if (operand_equal_p (e1, e2, 0))
3975     {
3976       *var_present = false;
3977       return no_cost;
3978     }
3979
3980   *var_present = true;
3981
3982   if (integer_zerop (e2))
3983     return force_var_cost (data, e1, depends_on);
3984
3985   if (integer_zerop (e1))
3986     {
3987       comp_cost cost = force_var_cost (data, e2, depends_on);
3988       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3989       return cost;
3990     }
3991
3992   type = signed_type_for (TREE_TYPE (e1));
3993   tree_to_aff_combination (e1, type, &aff_e1);
3994   tree_to_aff_combination (e2, type, &aff_e2);
3995   aff_combination_scale (&aff_e2, -1);
3996   aff_combination_add (&aff_e1, &aff_e2);
3997
3998   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3999 }
4000
4001 /* Returns true if AFF1 and AFF2 are identical.  */
4002
4003 static bool
4004 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4005 {
4006   unsigned i;
4007
4008   if (aff1->n != aff2->n)
4009     return false;
4010
4011   for (i = 0; i < aff1->n; i++)
4012     {
4013       if (aff1->elts[i].coef != aff2->elts[i].coef)
4014         return false;
4015
4016       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4017         return false;
4018     }
4019   return true;
4020 }
4021
4022 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4023
4024 static int
4025 get_expr_id (struct ivopts_data *data, tree expr)
4026 {
4027   struct iv_inv_expr_ent ent;
4028   struct iv_inv_expr_ent **slot;
4029
4030   ent.expr = expr;
4031   ent.hash = iterative_hash_expr (expr, 0);
4032   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4033   if (*slot)
4034     return (*slot)->id;
4035
4036   *slot = XNEW (struct iv_inv_expr_ent);
4037   (*slot)->expr = expr;
4038   (*slot)->hash = ent.hash;
4039   (*slot)->id = data->inv_expr_id++;
4040   return (*slot)->id;
4041 }
4042
4043 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4044    requires a new compiler generated temporary.  Returns -1 otherwise.
4045    ADDRESS_P is a flag indicating if the expression is for address
4046    computation.  */
4047
4048 static int
4049 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4050                             tree cbase, HOST_WIDE_INT ratio,
4051                             bool address_p)
4052 {
4053   aff_tree ubase_aff, cbase_aff;
4054   tree expr, ub, cb;
4055
4056   STRIP_NOPS (ubase);
4057   STRIP_NOPS (cbase);
4058   ub = ubase;
4059   cb = cbase;
4060
4061   if ((TREE_CODE (ubase) == INTEGER_CST)
4062       && (TREE_CODE (cbase) == INTEGER_CST))
4063     return -1;
4064
4065   /* Strips the constant part. */
4066   if (TREE_CODE (ubase) == PLUS_EXPR
4067       || TREE_CODE (ubase) == MINUS_EXPR
4068       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4069     {
4070       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4071         ubase = TREE_OPERAND (ubase, 0);
4072     }
4073
4074   /* Strips the constant part. */
4075   if (TREE_CODE (cbase) == PLUS_EXPR
4076       || TREE_CODE (cbase) == MINUS_EXPR
4077       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4078     {
4079       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4080         cbase = TREE_OPERAND (cbase, 0);
4081     }
4082
4083   if (address_p)
4084     {
4085       if (((TREE_CODE (ubase) == SSA_NAME)
4086            || (TREE_CODE (ubase) == ADDR_EXPR
4087                && is_gimple_min_invariant (ubase)))
4088           && (TREE_CODE (cbase) == INTEGER_CST))
4089         return -1;
4090
4091       if (((TREE_CODE (cbase) == SSA_NAME)
4092            || (TREE_CODE (cbase) == ADDR_EXPR
4093                && is_gimple_min_invariant (cbase)))
4094           && (TREE_CODE (ubase) == INTEGER_CST))
4095         return -1;
4096     }
4097
4098   if (ratio == 1)
4099     {
4100       if (operand_equal_p (ubase, cbase, 0))
4101         return -1;
4102
4103       if (TREE_CODE (ubase) == ADDR_EXPR
4104           && TREE_CODE (cbase) == ADDR_EXPR)
4105         {
4106           tree usym, csym;
4107
4108           usym = TREE_OPERAND (ubase, 0);
4109           csym = TREE_OPERAND (cbase, 0);
4110           if (TREE_CODE (usym) == ARRAY_REF)
4111             {
4112               tree ind = TREE_OPERAND (usym, 1);
4113               if (TREE_CODE (ind) == INTEGER_CST
4114                   && tree_fits_shwi_p (ind)
4115                   && tree_to_shwi (ind) == 0)
4116                 usym = TREE_OPERAND (usym, 0);
4117             }
4118           if (TREE_CODE (csym) == ARRAY_REF)
4119             {
4120               tree ind = TREE_OPERAND (csym, 1);
4121               if (TREE_CODE (ind) == INTEGER_CST
4122                   && tree_fits_shwi_p (ind)
4123                   && tree_to_shwi (ind) == 0)
4124                 csym = TREE_OPERAND (csym, 0);
4125             }
4126           if (operand_equal_p (usym, csym, 0))
4127             return -1;
4128         }
4129       /* Now do more complex comparison  */
4130       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4131       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4132       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4133         return -1;
4134     }
4135
4136   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4137   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4138
4139   aff_combination_scale (&cbase_aff, -1 * ratio);
4140   aff_combination_add (&ubase_aff, &cbase_aff);
4141   expr = aff_combination_to_tree (&ubase_aff);
4142   return get_expr_id (data, expr);
4143 }
4144
4145
4146
4147 /* Determines the cost of the computation by that USE is expressed
4148    from induction variable CAND.  If ADDRESS_P is true, we just need
4149    to create an address from it, otherwise we want to get it into
4150    register.  A set of invariants we depend on is stored in
4151    DEPENDS_ON.  AT is the statement at that the value is computed.
4152    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4153    addressing is likely.  */
4154
4155 static comp_cost
4156 get_computation_cost_at (struct ivopts_data *data,
4157                          struct iv_use *use, struct iv_cand *cand,
4158                          bool address_p, bitmap *depends_on, gimple at,
4159                          bool *can_autoinc,
4160                          int *inv_expr_id)
4161 {
4162   tree ubase = use->iv->base, ustep = use->iv->step;
4163   tree cbase, cstep;
4164   tree utype = TREE_TYPE (ubase), ctype;
4165   unsigned HOST_WIDE_INT cstepi, offset = 0;
4166   HOST_WIDE_INT ratio, aratio;
4167   bool var_present, symbol_present, stmt_is_after_inc;
4168   comp_cost cost;
4169   widest_int rat;
4170   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4171   machine_mode mem_mode = (address_p
4172                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4173                                 : VOIDmode);
4174
4175   *depends_on = NULL;
4176
4177   /* Only consider real candidates.  */
4178   if (!cand->iv)
4179     return infinite_cost;
4180
4181   cbase = cand->iv->base;
4182   cstep = cand->iv->step;
4183   ctype = TREE_TYPE (cbase);
4184
4185   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4186     {
4187       /* We do not have a precision to express the values of use.  */
4188       return infinite_cost;
4189     }
4190
4191   if (address_p
4192       || (use->iv->base_object
4193           && cand->iv->base_object
4194           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4195           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4196     {
4197       /* Do not try to express address of an object with computation based
4198          on address of a different object.  This may cause problems in rtl
4199          level alias analysis (that does not expect this to be happening,
4200          as this is illegal in C), and would be unlikely to be useful
4201          anyway.  */
4202       if (use->iv->base_object
4203           && cand->iv->base_object
4204           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4205         return infinite_cost;
4206     }
4207
4208   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4209     {
4210       /* TODO -- add direct handling of this case.  */
4211       goto fallback;
4212     }
4213
4214   /* CSTEPI is removed from the offset in case statement is after the
4215      increment.  If the step is not constant, we use zero instead.
4216      This is a bit imprecise (there is the extra addition), but
4217      redundancy elimination is likely to transform the code so that
4218      it uses value of the variable before increment anyway,
4219      so it is not that much unrealistic.  */
4220   if (cst_and_fits_in_hwi (cstep))
4221     cstepi = int_cst_value (cstep);
4222   else
4223     cstepi = 0;
4224
4225   if (!constant_multiple_of (ustep, cstep, &rat))
4226     return infinite_cost;
4227
4228   if (wi::fits_shwi_p (rat))
4229     ratio = rat.to_shwi ();
4230   else
4231     return infinite_cost;
4232
4233   STRIP_NOPS (cbase);
4234   ctype = TREE_TYPE (cbase);
4235
4236   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4237
4238   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4239      or ratio == 1, it is better to handle this like
4240
4241      ubase - ratio * cbase + ratio * var
4242
4243      (also holds in the case ratio == -1, TODO.  */
4244
4245   if (cst_and_fits_in_hwi (cbase))
4246     {
4247       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4248       cost = difference_cost (data,
4249                               ubase, build_int_cst (utype, 0),
4250                               &symbol_present, &var_present, &offset,
4251                               depends_on);
4252       cost.cost /= avg_loop_niter (data->current_loop);
4253     }
4254   else if (ratio == 1)
4255     {
4256       tree real_cbase = cbase;
4257
4258       /* Check to see if any adjustment is needed.  */
4259       if (cstepi == 0 && stmt_is_after_inc)
4260         {
4261           aff_tree real_cbase_aff;
4262           aff_tree cstep_aff;
4263
4264           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4265                                    &real_cbase_aff);
4266           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4267
4268           aff_combination_add (&real_cbase_aff, &cstep_aff);
4269           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4270         }
4271
4272       cost = difference_cost (data,
4273                               ubase, real_cbase,
4274                               &symbol_present, &var_present, &offset,
4275                               depends_on);
4276       cost.cost /= avg_loop_niter (data->current_loop);
4277     }
4278   else if (address_p
4279            && !POINTER_TYPE_P (ctype)
4280            && multiplier_allowed_in_address_p
4281                 (ratio, mem_mode,
4282                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4283     {
4284       cbase
4285         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4286       cost = difference_cost (data,
4287                               ubase, cbase,
4288                               &symbol_present, &var_present, &offset,
4289                               depends_on);
4290       cost.cost /= avg_loop_niter (data->current_loop);
4291     }
4292   else
4293     {
4294       cost = force_var_cost (data, cbase, depends_on);
4295       cost = add_costs (cost,
4296                         difference_cost (data,
4297                                          ubase, build_int_cst (utype, 0),
4298                                          &symbol_present, &var_present,
4299                                          &offset, depends_on));
4300       cost.cost /= avg_loop_niter (data->current_loop);
4301       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4302     }
4303
4304   if (inv_expr_id)
4305     {
4306       *inv_expr_id =
4307           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4308       /* Clear depends on.  */
4309       if (*inv_expr_id != -1 && depends_on && *depends_on)
4310         bitmap_clear (*depends_on);
4311     }
4312
4313   /* If we are after the increment, the value of the candidate is higher by
4314      one iteration.  */
4315   if (stmt_is_after_inc)
4316     offset -= ratio * cstepi;
4317
4318   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4319      (symbol/var1/const parts may be omitted).  If we are looking for an
4320      address, find the cost of addressing this.  */
4321   if (address_p)
4322     return add_costs (cost,
4323                       get_address_cost (symbol_present, var_present,
4324                                         offset, ratio, cstepi,
4325                                         mem_mode,
4326                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4327                                         speed, stmt_is_after_inc,
4328                                         can_autoinc));
4329
4330   /* Otherwise estimate the costs for computing the expression.  */
4331   if (!symbol_present && !var_present && !offset)
4332     {
4333       if (ratio != 1)
4334         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4335       return cost;
4336     }
4337
4338   /* Symbol + offset should be compile-time computable so consider that they
4339       are added once to the variable, if present.  */
4340   if (var_present && (symbol_present || offset))
4341     cost.cost += adjust_setup_cost (data,
4342                                     add_cost (speed, TYPE_MODE (ctype)));
4343
4344   /* Having offset does not affect runtime cost in case it is added to
4345      symbol, but it increases complexity.  */
4346   if (offset)
4347     cost.complexity++;
4348
4349   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4350
4351   aratio = ratio > 0 ? ratio : -ratio;
4352   if (aratio != 1)
4353     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4354   return cost;
4355
4356 fallback:
4357   if (can_autoinc)
4358     *can_autoinc = false;
4359
4360   {
4361     /* Just get the expression, expand it and measure the cost.  */
4362     tree comp = get_computation_at (data->current_loop, use, cand, at);
4363
4364     if (!comp)
4365       return infinite_cost;
4366
4367     if (address_p)
4368       comp = build_simple_mem_ref (comp);
4369
4370     return new_cost (computation_cost (comp, speed), 0);
4371   }
4372 }
4373
4374 /* Determines the cost of the computation by that USE is expressed
4375    from induction variable CAND.  If ADDRESS_P is true, we just need
4376    to create an address from it, otherwise we want to get it into
4377    register.  A set of invariants we depend on is stored in
4378    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4379    autoinc addressing is likely.  */
4380
4381 static comp_cost
4382 get_computation_cost (struct ivopts_data *data,
4383                       struct iv_use *use, struct iv_cand *cand,
4384                       bool address_p, bitmap *depends_on,
4385                       bool *can_autoinc, int *inv_expr_id)
4386 {
4387   return get_computation_cost_at (data,
4388                                   use, cand, address_p, depends_on, use->stmt,
4389                                   can_autoinc, inv_expr_id);
4390 }
4391
4392 /* Determines cost of basing replacement of USE on CAND in a generic
4393    expression.  */
4394
4395 static bool
4396 determine_use_iv_cost_generic (struct ivopts_data *data,
4397                                struct iv_use *use, struct iv_cand *cand)
4398 {
4399   bitmap depends_on;
4400   comp_cost cost;
4401   int inv_expr_id = -1;
4402
4403   /* The simple case first -- if we need to express value of the preserved
4404      original biv, the cost is 0.  This also prevents us from counting the
4405      cost of increment twice -- once at this use and once in the cost of
4406      the candidate.  */
4407   if (cand->pos == IP_ORIGINAL
4408       && cand->incremented_at == use->stmt)
4409     {
4410       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4411                        ERROR_MARK, -1);
4412       return true;
4413     }
4414
4415   cost = get_computation_cost (data, use, cand, false, &depends_on,
4416                                NULL, &inv_expr_id);
4417
4418   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4419                    inv_expr_id);
4420
4421   return !infinite_cost_p (cost);
4422 }
4423
4424 /* Determines cost of basing replacement of USE on CAND in an address.  */
4425
4426 static bool
4427 determine_use_iv_cost_address (struct ivopts_data *data,
4428                                struct iv_use *use, struct iv_cand *cand)
4429 {
4430   bitmap depends_on;
4431   bool can_autoinc;
4432   int inv_expr_id = -1;
4433   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4434                                          &can_autoinc, &inv_expr_id);
4435
4436   if (cand->ainc_use == use)
4437     {
4438       if (can_autoinc)
4439         cost.cost -= cand->cost_step;
4440       /* If we generated the candidate solely for exploiting autoincrement
4441          opportunities, and it turns out it can't be used, set the cost to
4442          infinity to make sure we ignore it.  */
4443       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4444         cost = infinite_cost;
4445     }
4446   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4447                    inv_expr_id);
4448
4449   return !infinite_cost_p (cost);
4450 }
4451
4452 /* Computes value of candidate CAND at position AT in iteration NITER, and
4453    stores it to VAL.  */
4454
4455 static void
4456 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4457                aff_tree *val)
4458 {
4459   aff_tree step, delta, nit;
4460   struct iv *iv = cand->iv;
4461   tree type = TREE_TYPE (iv->base);
4462   tree steptype = type;
4463   if (POINTER_TYPE_P (type))
4464     steptype = sizetype;
4465   steptype = unsigned_type_for (type);
4466
4467   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4468   aff_combination_convert (&step, steptype);
4469   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4470   aff_combination_convert (&nit, steptype);
4471   aff_combination_mult (&nit, &step, &delta);
4472   if (stmt_after_increment (loop, cand, at))
4473     aff_combination_add (&delta, &step);
4474
4475   tree_to_aff_combination (iv->base, type, val);
4476   if (!POINTER_TYPE_P (type))
4477     aff_combination_convert (val, steptype);
4478   aff_combination_add (val, &delta);
4479 }
4480
4481 /* Returns period of induction variable iv.  */
4482
4483 static tree
4484 iv_period (struct iv *iv)
4485 {
4486   tree step = iv->step, period, type;
4487   tree pow2div;
4488
4489   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4490
4491   type = unsigned_type_for (TREE_TYPE (step));
4492   /* Period of the iv is lcm (step, type_range)/step -1,
4493      i.e., N*type_range/step - 1. Since type range is power
4494      of two, N == (step >> num_of_ending_zeros_binary (step),
4495      so the final result is
4496
4497        (type_range >> num_of_ending_zeros_binary (step)) - 1
4498
4499   */
4500   pow2div = num_ending_zeros (step);
4501
4502   period = build_low_bits_mask (type,
4503                                 (TYPE_PRECISION (type)
4504                                  - tree_to_uhwi (pow2div)));
4505
4506   return period;
4507 }
4508
4509 /* Returns the comparison operator used when eliminating the iv USE.  */
4510
4511 static enum tree_code
4512 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4513 {
4514   struct loop *loop = data->current_loop;
4515   basic_block ex_bb;
4516   edge exit;
4517
4518   ex_bb = gimple_bb (use->stmt);
4519   exit = EDGE_SUCC (ex_bb, 0);
4520   if (flow_bb_inside_loop_p (loop, exit->dest))
4521     exit = EDGE_SUCC (ex_bb, 1);
4522
4523   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4524 }
4525
4526 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4527    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4528    calculation is performed in non-wrapping type.
4529
4530    TODO: More generally, we could test for the situation that
4531          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4532          This would require knowing the sign of OFFSET.  */
4533
4534 static bool
4535 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4536 {
4537   enum tree_code code;
4538   tree e1, e2;
4539   aff_tree aff_e1, aff_e2, aff_offset;
4540
4541   if (!nowrap_type_p (TREE_TYPE (base)))
4542     return false;
4543
4544   base = expand_simple_operations (base);
4545
4546   if (TREE_CODE (base) == SSA_NAME)
4547     {
4548       gimple stmt = SSA_NAME_DEF_STMT (base);
4549
4550       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4551         return false;
4552
4553       code = gimple_assign_rhs_code (stmt);
4554       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4555         return false;
4556
4557       e1 = gimple_assign_rhs1 (stmt);
4558       e2 = gimple_assign_rhs2 (stmt);
4559     }
4560   else
4561     {
4562       code = TREE_CODE (base);
4563       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4564         return false;
4565       e1 = TREE_OPERAND (base, 0);
4566       e2 = TREE_OPERAND (base, 1);
4567     }
4568
4569   /* Use affine expansion as deeper inspection to prove the equality.  */
4570   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4571                                   &aff_e2, &data->name_expansion_cache);
4572   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4573                                   &aff_offset, &data->name_expansion_cache);
4574   aff_combination_scale (&aff_offset, -1);
4575   switch (code)
4576     {
4577     case PLUS_EXPR:
4578       aff_combination_add (&aff_e2, &aff_offset);
4579       if (aff_combination_zero_p (&aff_e2))
4580         return true;
4581
4582       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4583                                       &aff_e1, &data->name_expansion_cache);
4584       aff_combination_add (&aff_e1, &aff_offset);
4585       return aff_combination_zero_p (&aff_e1);
4586
4587     case POINTER_PLUS_EXPR:
4588       aff_combination_add (&aff_e2, &aff_offset);
4589       return aff_combination_zero_p (&aff_e2);
4590
4591     default:
4592       return false;
4593     }
4594 }
4595
4596 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4597    comparison with CAND.  NITER describes the number of iterations of
4598    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4599
4600    We aim to handle the following situation:
4601
4602    sometype *base, *p;
4603    int a, b, i;
4604
4605    i = a;
4606    p = p_0 = base + a;
4607
4608    do
4609      {
4610        bla (*p);
4611        p++;
4612        i++;
4613      }
4614    while (i < b);
4615
4616    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4617    We aim to optimize this to
4618
4619    p = p_0 = base + a;
4620    do
4621      {
4622        bla (*p);
4623        p++;
4624      }
4625    while (p < p_0 - a + b);
4626
4627    This preserves the correctness, since the pointer arithmetics does not
4628    overflow.  More precisely:
4629
4630    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4631       overflow in computing it or the values of p.
4632    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4633       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4634
4635 static bool
4636 iv_elimination_compare_lt (struct ivopts_data *data,
4637                            struct iv_cand *cand, enum tree_code *comp_p,
4638                            struct tree_niter_desc *niter)
4639 {
4640   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4641   struct aff_tree nit, tmpa, tmpb;
4642   enum tree_code comp;
4643   HOST_WIDE_INT step;
4644
4645   /* We need to know that the candidate induction variable does not overflow.
4646      While more complex analysis may be used to prove this, for now just
4647      check that the variable appears in the original program and that it
4648      is computed in a type that guarantees no overflows.  */
4649   cand_type = TREE_TYPE (cand->iv->base);
4650   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4651     return false;
4652
4653   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4654      the calculation of the BOUND could overflow, making the comparison
4655      invalid.  */
4656   if (!data->loop_single_exit_p)
4657     return false;
4658
4659   /* We need to be able to decide whether candidate is increasing or decreasing
4660      in order to choose the right comparison operator.  */
4661   if (!cst_and_fits_in_hwi (cand->iv->step))
4662     return false;
4663   step = int_cst_value (cand->iv->step);
4664
4665   /* Check that the number of iterations matches the expected pattern:
4666      a + 1 > b ? 0 : b - a - 1.  */
4667   mbz = niter->may_be_zero;
4668   if (TREE_CODE (mbz) == GT_EXPR)
4669     {
4670       /* Handle a + 1 > b.  */
4671       tree op0 = TREE_OPERAND (mbz, 0);
4672       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4673         {
4674           a = TREE_OPERAND (op0, 0);
4675           b = TREE_OPERAND (mbz, 1);
4676         }
4677       else
4678         return false;
4679     }
4680   else if (TREE_CODE (mbz) == LT_EXPR)
4681     {
4682       tree op1 = TREE_OPERAND (mbz, 1);
4683
4684       /* Handle b < a + 1.  */
4685       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4686         {
4687           a = TREE_OPERAND (op1, 0);
4688           b = TREE_OPERAND (mbz, 0);
4689         }
4690       else
4691         return false;
4692     }
4693   else
4694     return false;
4695
4696   /* Expected number of iterations is B - A - 1.  Check that it matches
4697      the actual number, i.e., that B - A - NITER = 1.  */
4698   tree_to_aff_combination (niter->niter, nit_type, &nit);
4699   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4700   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4701   aff_combination_scale (&nit, -1);
4702   aff_combination_scale (&tmpa, -1);
4703   aff_combination_add (&tmpb, &tmpa);
4704   aff_combination_add (&tmpb, &nit);
4705   if (tmpb.n != 0 || tmpb.offset != 1)
4706     return false;
4707
4708   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4709      overflow.  */
4710   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4711                         cand->iv->step,
4712                         fold_convert (TREE_TYPE (cand->iv->step), a));
4713   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4714     return false;
4715
4716   /* Determine the new comparison operator.  */
4717   comp = step < 0 ? GT_EXPR : LT_EXPR;
4718   if (*comp_p == NE_EXPR)
4719     *comp_p = comp;
4720   else if (*comp_p == EQ_EXPR)
4721     *comp_p = invert_tree_comparison (comp, false);
4722   else
4723     gcc_unreachable ();
4724
4725   return true;
4726 }
4727
4728 /* Check whether it is possible to express the condition in USE by comparison
4729    of candidate CAND.  If so, store the value compared with to BOUND, and the
4730    comparison operator to COMP.  */
4731
4732 static bool
4733 may_eliminate_iv (struct ivopts_data *data,
4734                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4735                   enum tree_code *comp)
4736 {
4737   basic_block ex_bb;
4738   edge exit;
4739   tree period;
4740   struct loop *loop = data->current_loop;
4741   aff_tree bnd;
4742   struct tree_niter_desc *desc = NULL;
4743
4744   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4745     return false;
4746
4747   /* For now works only for exits that dominate the loop latch.
4748      TODO: extend to other conditions inside loop body.  */
4749   ex_bb = gimple_bb (use->stmt);
4750   if (use->stmt != last_stmt (ex_bb)
4751       || gimple_code (use->stmt) != GIMPLE_COND
4752       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4753     return false;
4754
4755   exit = EDGE_SUCC (ex_bb, 0);
4756   if (flow_bb_inside_loop_p (loop, exit->dest))
4757     exit = EDGE_SUCC (ex_bb, 1);
4758   if (flow_bb_inside_loop_p (loop, exit->dest))
4759     return false;
4760
4761   desc = niter_for_exit (data, exit);
4762   if (!desc)
4763     return false;
4764
4765   /* Determine whether we can use the variable to test the exit condition.
4766      This is the case iff the period of the induction variable is greater
4767      than the number of iterations for which the exit condition is true.  */
4768   period = iv_period (cand->iv);
4769
4770   /* If the number of iterations is constant, compare against it directly.  */
4771   if (TREE_CODE (desc->niter) == INTEGER_CST)
4772     {
4773       /* See cand_value_at.  */
4774       if (stmt_after_increment (loop, cand, use->stmt))
4775         {
4776           if (!tree_int_cst_lt (desc->niter, period))
4777             return false;
4778         }
4779       else
4780         {
4781           if (tree_int_cst_lt (period, desc->niter))
4782             return false;
4783         }
4784     }
4785
4786   /* If not, and if this is the only possible exit of the loop, see whether
4787      we can get a conservative estimate on the number of iterations of the
4788      entire loop and compare against that instead.  */
4789   else
4790     {
4791       widest_int period_value, max_niter;
4792
4793       max_niter = desc->max;
4794       if (stmt_after_increment (loop, cand, use->stmt))
4795         max_niter += 1;
4796       period_value = wi::to_widest (period);
4797       if (wi::gtu_p (max_niter, period_value))
4798         {
4799           /* See if we can take advantage of inferred loop bound information.  */
4800           if (data->loop_single_exit_p)
4801             {
4802               if (!max_loop_iterations (loop, &max_niter))
4803                 return false;
4804               /* The loop bound is already adjusted by adding 1.  */
4805               if (wi::gtu_p (max_niter, period_value))
4806                 return false;
4807             }
4808           else
4809             return false;
4810         }
4811     }
4812
4813   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4814
4815   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4816                          aff_combination_to_tree (&bnd));
4817   *comp = iv_elimination_compare (data, use);
4818
4819   /* It is unlikely that computing the number of iterations using division
4820      would be more profitable than keeping the original induction variable.  */
4821   if (expression_expensive_p (*bound))
4822     return false;
4823
4824   /* Sometimes, it is possible to handle the situation that the number of
4825      iterations may be zero unless additional assumtions by using <
4826      instead of != in the exit condition.
4827
4828      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4829            base the exit condition on it.  However, that is often too
4830            expensive.  */
4831   if (!integer_zerop (desc->may_be_zero))
4832     return iv_elimination_compare_lt (data, cand, comp, desc);
4833
4834   return true;
4835 }
4836
4837  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4838     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4839
4840 static int
4841 parm_decl_cost (struct ivopts_data *data, tree bound)
4842 {
4843   tree sbound = bound;
4844   STRIP_NOPS (sbound);
4845
4846   if (TREE_CODE (sbound) == SSA_NAME
4847       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4848       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4849       && data->body_includes_call)
4850     return COSTS_N_INSNS (1);
4851
4852   return 0;
4853 }
4854
4855 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4856
4857 static bool
4858 determine_use_iv_cost_condition (struct ivopts_data *data,
4859                                  struct iv_use *use, struct iv_cand *cand)
4860 {
4861   tree bound = NULL_TREE;
4862   struct iv *cmp_iv;
4863   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4864   comp_cost elim_cost, express_cost, cost, bound_cost;
4865   bool ok;
4866   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4867   tree *control_var, *bound_cst;
4868   enum tree_code comp = ERROR_MARK;
4869
4870   /* Only consider real candidates.  */
4871   if (!cand->iv)
4872     {
4873       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4874                        ERROR_MARK, -1);
4875       return false;
4876     }
4877
4878   /* Try iv elimination.  */
4879   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4880     {
4881       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4882       if (elim_cost.cost == 0)
4883         elim_cost.cost = parm_decl_cost (data, bound);
4884       else if (TREE_CODE (bound) == INTEGER_CST)
4885         elim_cost.cost = 0;
4886       /* If we replace a loop condition 'i < n' with 'p < base + n',
4887          depends_on_elim will have 'base' and 'n' set, which implies
4888          that both 'base' and 'n' will be live during the loop.  More likely,
4889          'base + n' will be loop invariant, resulting in only one live value
4890          during the loop.  So in that case we clear depends_on_elim and set
4891         elim_inv_expr_id instead.  */
4892       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4893         {
4894           elim_inv_expr_id = get_expr_id (data, bound);
4895           bitmap_clear (depends_on_elim);
4896         }
4897       /* The bound is a loop invariant, so it will be only computed
4898          once.  */
4899       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4900     }
4901   else
4902     elim_cost = infinite_cost;
4903
4904   /* Try expressing the original giv.  If it is compared with an invariant,
4905      note that we cannot get rid of it.  */
4906   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4907                               NULL, &cmp_iv);
4908   gcc_assert (ok);
4909
4910   /* When the condition is a comparison of the candidate IV against
4911      zero, prefer this IV.
4912
4913      TODO: The constant that we're subtracting from the cost should
4914      be target-dependent.  This information should be added to the
4915      target costs for each backend.  */
4916   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4917       && integer_zerop (*bound_cst)
4918       && (operand_equal_p (*control_var, cand->var_after, 0)
4919           || operand_equal_p (*control_var, cand->var_before, 0)))
4920     elim_cost.cost -= 1;
4921
4922   express_cost = get_computation_cost (data, use, cand, false,
4923                                        &depends_on_express, NULL,
4924                                        &express_inv_expr_id);
4925   fd_ivopts_data = data;
4926   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4927
4928   /* Count the cost of the original bound as well.  */
4929   bound_cost = force_var_cost (data, *bound_cst, NULL);
4930   if (bound_cost.cost == 0)
4931     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4932   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4933     bound_cost.cost = 0;
4934   express_cost.cost += bound_cost.cost;
4935
4936   /* Choose the better approach, preferring the eliminated IV. */
4937   if (compare_costs (elim_cost, express_cost) <= 0)
4938     {
4939       cost = elim_cost;
4940       depends_on = depends_on_elim;
4941       depends_on_elim = NULL;
4942       inv_expr_id = elim_inv_expr_id;
4943     }
4944   else
4945     {
4946       cost = express_cost;
4947       depends_on = depends_on_express;
4948       depends_on_express = NULL;
4949       bound = NULL_TREE;
4950       comp = ERROR_MARK;
4951       inv_expr_id = express_inv_expr_id;
4952     }
4953
4954   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4955
4956   if (depends_on_elim)
4957     BITMAP_FREE (depends_on_elim);
4958   if (depends_on_express)
4959     BITMAP_FREE (depends_on_express);
4960
4961   return !infinite_cost_p (cost);
4962 }
4963
4964 /* Determines cost of basing replacement of USE on CAND.  Returns false
4965    if USE cannot be based on CAND.  */
4966
4967 static bool
4968 determine_use_iv_cost (struct ivopts_data *data,
4969                        struct iv_use *use, struct iv_cand *cand)
4970 {
4971   switch (use->type)
4972     {
4973     case USE_NONLINEAR_EXPR:
4974       return determine_use_iv_cost_generic (data, use, cand);
4975
4976     case USE_ADDRESS:
4977       return determine_use_iv_cost_address (data, use, cand);
4978
4979     case USE_COMPARE:
4980       return determine_use_iv_cost_condition (data, use, cand);
4981
4982     default:
4983       gcc_unreachable ();
4984     }
4985 }
4986
4987 /* Return true if get_computation_cost indicates that autoincrement is
4988    a possibility for the pair of USE and CAND, false otherwise.  */
4989
4990 static bool
4991 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4992                            struct iv_cand *cand)
4993 {
4994   bitmap depends_on;
4995   bool can_autoinc;
4996   comp_cost cost;
4997
4998   if (use->type != USE_ADDRESS)
4999     return false;
5000
5001   cost = get_computation_cost (data, use, cand, true, &depends_on,
5002                                &can_autoinc, NULL);
5003
5004   BITMAP_FREE (depends_on);
5005
5006   return !infinite_cost_p (cost) && can_autoinc;
5007 }
5008
5009 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5010    use that allows autoincrement, and set their AINC_USE if possible.  */
5011
5012 static void
5013 set_autoinc_for_original_candidates (struct ivopts_data *data)
5014 {
5015   unsigned i, j;
5016
5017   for (i = 0; i < n_iv_cands (data); i++)
5018     {
5019       struct iv_cand *cand = iv_cand (data, i);
5020       struct iv_use *closest_before = NULL;
5021       struct iv_use *closest_after = NULL;
5022       if (cand->pos != IP_ORIGINAL)
5023         continue;
5024
5025       for (j = 0; j < n_iv_uses (data); j++)
5026         {
5027           struct iv_use *use = iv_use (data, j);
5028           unsigned uid = gimple_uid (use->stmt);
5029
5030           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5031             continue;
5032
5033           if (uid < gimple_uid (cand->incremented_at)
5034               && (closest_before == NULL
5035                   || uid > gimple_uid (closest_before->stmt)))
5036             closest_before = use;
5037
5038           if (uid > gimple_uid (cand->incremented_at)
5039               && (closest_after == NULL
5040                   || uid < gimple_uid (closest_after->stmt)))
5041             closest_after = use;
5042         }
5043
5044       if (closest_before != NULL
5045           && autoinc_possible_for_pair (data, closest_before, cand))
5046         cand->ainc_use = closest_before;
5047       else if (closest_after != NULL
5048                && autoinc_possible_for_pair (data, closest_after, cand))
5049         cand->ainc_use = closest_after;
5050     }
5051 }
5052
5053 /* Finds the candidates for the induction variables.  */
5054
5055 static void
5056 find_iv_candidates (struct ivopts_data *data)
5057 {
5058   /* Add commonly used ivs.  */
5059   add_standard_iv_candidates (data);
5060
5061   /* Add old induction variables.  */
5062   add_old_ivs_candidates (data);
5063
5064   /* Add induction variables derived from uses.  */
5065   add_derived_ivs_candidates (data);
5066
5067   set_autoinc_for_original_candidates (data);
5068
5069   /* Record the important candidates.  */
5070   record_important_candidates (data);
5071 }
5072
5073 /* Determines costs of basing the use of the iv on an iv candidate.  */
5074
5075 static void
5076 determine_use_iv_costs (struct ivopts_data *data)
5077 {
5078   unsigned i, j;
5079   struct iv_use *use;
5080   struct iv_cand *cand;
5081   bitmap to_clear = BITMAP_ALLOC (NULL);
5082
5083   alloc_use_cost_map (data);
5084
5085   for (i = 0; i < n_iv_uses (data); i++)
5086     {
5087       use = iv_use (data, i);
5088
5089       if (data->consider_all_candidates)
5090         {
5091           for (j = 0; j < n_iv_cands (data); j++)
5092             {
5093               cand = iv_cand (data, j);
5094               determine_use_iv_cost (data, use, cand);
5095             }
5096         }
5097       else
5098         {
5099           bitmap_iterator bi;
5100
5101           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5102             {
5103               cand = iv_cand (data, j);
5104               if (!determine_use_iv_cost (data, use, cand))
5105                 bitmap_set_bit (to_clear, j);
5106             }
5107
5108           /* Remove the candidates for that the cost is infinite from
5109              the list of related candidates.  */
5110           bitmap_and_compl_into (use->related_cands, to_clear);
5111           bitmap_clear (to_clear);
5112         }
5113     }
5114
5115   BITMAP_FREE (to_clear);
5116
5117   if (dump_file && (dump_flags & TDF_DETAILS))
5118     {
5119       fprintf (dump_file, "Use-candidate costs:\n");
5120
5121       for (i = 0; i < n_iv_uses (data); i++)
5122         {
5123           use = iv_use (data, i);
5124
5125           fprintf (dump_file, "Use %d:\n", i);
5126           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5127           for (j = 0; j < use->n_map_members; j++)
5128             {
5129               if (!use->cost_map[j].cand
5130                   || infinite_cost_p (use->cost_map[j].cost))
5131                 continue;
5132
5133               fprintf (dump_file, "  %d\t%d\t%d\t",
5134                        use->cost_map[j].cand->id,
5135                        use->cost_map[j].cost.cost,
5136                        use->cost_map[j].cost.complexity);
5137               if (use->cost_map[j].depends_on)
5138                 bitmap_print (dump_file,
5139                               use->cost_map[j].depends_on, "","");
5140               if (use->cost_map[j].inv_expr_id != -1)
5141                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5142               fprintf (dump_file, "\n");
5143             }
5144
5145           fprintf (dump_file, "\n");
5146         }
5147       fprintf (dump_file, "\n");
5148     }
5149 }
5150
5151 /* Determines cost of the candidate CAND.  */
5152
5153 static void
5154 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5155 {
5156   comp_cost cost_base;
5157   unsigned cost, cost_step;
5158   tree base;
5159
5160   if (!cand->iv)
5161     {
5162       cand->cost = 0;
5163       return;
5164     }
5165
5166   /* There are two costs associated with the candidate -- its increment
5167      and its initialization.  The second is almost negligible for any loop
5168      that rolls enough, so we take it just very little into account.  */
5169
5170   base = cand->iv->base;
5171   cost_base = force_var_cost (data, base, NULL);
5172   /* It will be exceptional that the iv register happens to be initialized with
5173      the proper value at no cost.  In general, there will at least be a regcopy
5174      or a const set.  */
5175   if (cost_base.cost == 0)
5176     cost_base.cost = COSTS_N_INSNS (1);
5177   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5178
5179   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5180
5181   /* Prefer the original ivs unless we may gain something by replacing it.
5182      The reason is to make debugging simpler; so this is not relevant for
5183      artificial ivs created by other optimization passes.  */
5184   if (cand->pos != IP_ORIGINAL
5185       || !SSA_NAME_VAR (cand->var_before)
5186       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5187     cost++;
5188
5189   /* Prefer not to insert statements into latch unless there are some
5190      already (so that we do not create unnecessary jumps).  */
5191   if (cand->pos == IP_END
5192       && empty_block_p (ip_end_pos (data->current_loop)))
5193     cost++;
5194
5195   cand->cost = cost;
5196   cand->cost_step = cost_step;
5197 }
5198
5199 /* Determines costs of computation of the candidates.  */
5200
5201 static void
5202 determine_iv_costs (struct ivopts_data *data)
5203 {
5204   unsigned i;
5205
5206   if (dump_file && (dump_flags & TDF_DETAILS))
5207     {
5208       fprintf (dump_file, "Candidate costs:\n");
5209       fprintf (dump_file, "  cand\tcost\n");
5210     }
5211
5212   for (i = 0; i < n_iv_cands (data); i++)
5213     {
5214       struct iv_cand *cand = iv_cand (data, i);
5215
5216       determine_iv_cost (data, cand);
5217
5218       if (dump_file && (dump_flags & TDF_DETAILS))
5219         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5220     }
5221
5222   if (dump_file && (dump_flags & TDF_DETAILS))
5223     fprintf (dump_file, "\n");
5224 }
5225
5226 /* Calculates cost for having SIZE induction variables.  */
5227
5228 static unsigned
5229 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5230 {
5231   /* We add size to the cost, so that we prefer eliminating ivs
5232      if possible.  */
5233   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5234                                             data->body_includes_call);
5235 }
5236
5237 /* For each size of the induction variable set determine the penalty.  */
5238
5239 static void
5240 determine_set_costs (struct ivopts_data *data)
5241 {
5242   unsigned j, n;
5243   gphi *phi;
5244   gphi_iterator psi;
5245   tree op;
5246   struct loop *loop = data->current_loop;
5247   bitmap_iterator bi;
5248
5249   if (dump_file && (dump_flags & TDF_DETAILS))
5250     {
5251       fprintf (dump_file, "Global costs:\n");
5252       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5253       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5254       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5255       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5256     }
5257
5258   n = 0;
5259   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5260     {
5261       phi = psi.phi ();
5262       op = PHI_RESULT (phi);
5263
5264       if (virtual_operand_p (op))
5265         continue;
5266
5267       if (get_iv (data, op))
5268         continue;
5269
5270       n++;
5271     }
5272
5273   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5274     {
5275       struct version_info *info = ver_info (data, j);
5276
5277       if (info->inv_id && info->has_nonlin_use)
5278         n++;
5279     }
5280
5281   data->regs_used = n;
5282   if (dump_file && (dump_flags & TDF_DETAILS))
5283     fprintf (dump_file, "  regs_used %d\n", n);
5284
5285   if (dump_file && (dump_flags & TDF_DETAILS))
5286     {
5287       fprintf (dump_file, "  cost for size:\n");
5288       fprintf (dump_file, "  ivs\tcost\n");
5289       for (j = 0; j <= 2 * target_avail_regs; j++)
5290         fprintf (dump_file, "  %d\t%d\n", j,
5291                  ivopts_global_cost_for_size (data, j));
5292       fprintf (dump_file, "\n");
5293     }
5294 }
5295
5296 /* Returns true if A is a cheaper cost pair than B.  */
5297
5298 static bool
5299 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5300 {
5301   int cmp;
5302
5303   if (!a)
5304     return false;
5305
5306   if (!b)
5307     return true;
5308
5309   cmp = compare_costs (a->cost, b->cost);
5310   if (cmp < 0)
5311     return true;
5312
5313   if (cmp > 0)
5314     return false;
5315
5316   /* In case the costs are the same, prefer the cheaper candidate.  */
5317   if (a->cand->cost < b->cand->cost)
5318     return true;
5319
5320   return false;
5321 }
5322
5323
5324 /* Returns candidate by that USE is expressed in IVS.  */
5325
5326 static struct cost_pair *
5327 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5328 {
5329   return ivs->cand_for_use[use->id];
5330 }
5331
5332 /* Computes the cost field of IVS structure.  */
5333
5334 static void
5335 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5336 {
5337   comp_cost cost = ivs->cand_use_cost;
5338
5339   cost.cost += ivs->cand_cost;
5340
5341   cost.cost += ivopts_global_cost_for_size (data,
5342                                             ivs->n_regs + ivs->num_used_inv_expr);
5343
5344   ivs->cost = cost;
5345 }
5346
5347 /* Remove invariants in set INVS to set IVS.  */
5348
5349 static void
5350 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5351 {
5352   bitmap_iterator bi;
5353   unsigned iid;
5354
5355   if (!invs)
5356     return;
5357
5358   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5359     {
5360       ivs->n_invariant_uses[iid]--;
5361       if (ivs->n_invariant_uses[iid] == 0)
5362         ivs->n_regs--;
5363     }
5364 }
5365
5366 /* Set USE not to be expressed by any candidate in IVS.  */
5367
5368 static void
5369 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5370                  struct iv_use *use)
5371 {
5372   unsigned uid = use->id, cid;
5373   struct cost_pair *cp;
5374
5375   cp = ivs->cand_for_use[uid];
5376   if (!cp)
5377     return;
5378   cid = cp->cand->id;
5379
5380   ivs->bad_uses++;
5381   ivs->cand_for_use[uid] = NULL;
5382   ivs->n_cand_uses[cid]--;
5383
5384   if (ivs->n_cand_uses[cid] == 0)
5385     {
5386       bitmap_clear_bit (ivs->cands, cid);
5387       /* Do not count the pseudocandidates.  */
5388       if (cp->cand->iv)
5389         ivs->n_regs--;
5390       ivs->n_cands--;
5391       ivs->cand_cost -= cp->cand->cost;
5392
5393       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5394     }
5395
5396   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5397
5398   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5399
5400   if (cp->inv_expr_id != -1)
5401     {
5402       ivs->used_inv_expr[cp->inv_expr_id]--;
5403       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5404         ivs->num_used_inv_expr--;
5405     }
5406   iv_ca_recount_cost (data, ivs);
5407 }
5408
5409 /* Add invariants in set INVS to set IVS.  */
5410
5411 static void
5412 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5413 {
5414   bitmap_iterator bi;
5415   unsigned iid;
5416
5417   if (!invs)
5418     return;
5419
5420   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5421     {
5422       ivs->n_invariant_uses[iid]++;
5423       if (ivs->n_invariant_uses[iid] == 1)
5424         ivs->n_regs++;
5425     }
5426 }
5427
5428 /* Set cost pair for USE in set IVS to CP.  */
5429
5430 static void
5431 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5432               struct iv_use *use, struct cost_pair *cp)
5433 {
5434   unsigned uid = use->id, cid;
5435
5436   if (ivs->cand_for_use[uid] == cp)
5437     return;
5438
5439   if (ivs->cand_for_use[uid])
5440     iv_ca_set_no_cp (data, ivs, use);
5441
5442   if (cp)
5443     {
5444       cid = cp->cand->id;
5445
5446       ivs->bad_uses--;
5447       ivs->cand_for_use[uid] = cp;
5448       ivs->n_cand_uses[cid]++;
5449       if (ivs->n_cand_uses[cid] == 1)
5450         {
5451           bitmap_set_bit (ivs->cands, cid);
5452           /* Do not count the pseudocandidates.  */
5453           if (cp->cand->iv)
5454             ivs->n_regs++;
5455           ivs->n_cands++;
5456           ivs->cand_cost += cp->cand->cost;
5457
5458           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5459         }
5460
5461       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5462       iv_ca_set_add_invariants (ivs, cp->depends_on);
5463
5464       if (cp->inv_expr_id != -1)
5465         {
5466           ivs->used_inv_expr[cp->inv_expr_id]++;
5467           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5468             ivs->num_used_inv_expr++;
5469         }
5470       iv_ca_recount_cost (data, ivs);
5471     }
5472 }
5473
5474 /* Extend set IVS by expressing USE by some of the candidates in it
5475    if possible.  Consider all important candidates if candidates in
5476    set IVS don't give any result.  */
5477
5478 static void
5479 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5480                struct iv_use *use)
5481 {
5482   struct cost_pair *best_cp = NULL, *cp;
5483   bitmap_iterator bi;
5484   unsigned i;
5485   struct iv_cand *cand;
5486
5487   gcc_assert (ivs->upto >= use->id);
5488   ivs->upto++;
5489   ivs->bad_uses++;
5490
5491   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5492     {
5493       cand = iv_cand (data, i);
5494       cp = get_use_iv_cost (data, use, cand);
5495       if (cheaper_cost_pair (cp, best_cp))
5496         best_cp = cp;
5497     }
5498
5499   if (best_cp == NULL)
5500     {
5501       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5502         {
5503           cand = iv_cand (data, i);
5504           cp = get_use_iv_cost (data, use, cand);
5505           if (cheaper_cost_pair (cp, best_cp))
5506             best_cp = cp;
5507         }
5508     }
5509
5510   iv_ca_set_cp (data, ivs, use, best_cp);
5511 }
5512
5513 /* Get cost for assignment IVS.  */
5514
5515 static comp_cost
5516 iv_ca_cost (struct iv_ca *ivs)
5517 {
5518   /* This was a conditional expression but it triggered a bug in
5519      Sun C 5.5.  */
5520   if (ivs->bad_uses)
5521     return infinite_cost;
5522   else
5523     return ivs->cost;
5524 }
5525
5526 /* Returns true if all dependences of CP are among invariants in IVS.  */
5527
5528 static bool
5529 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5530 {
5531   unsigned i;
5532   bitmap_iterator bi;
5533
5534   if (!cp->depends_on)
5535     return true;
5536
5537   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5538     {
5539       if (ivs->n_invariant_uses[i] == 0)
5540         return false;
5541     }
5542
5543   return true;
5544 }
5545
5546 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5547    it before NEXT_CHANGE.  */
5548
5549 static struct iv_ca_delta *
5550 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5551                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5552 {
5553   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5554
5555   change->use = use;
5556   change->old_cp = old_cp;
5557   change->new_cp = new_cp;
5558   change->next_change = next_change;
5559
5560   return change;
5561 }
5562
5563 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5564    are rewritten.  */
5565
5566 static struct iv_ca_delta *
5567 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5568 {
5569   struct iv_ca_delta *last;
5570
5571   if (!l2)
5572     return l1;
5573
5574   if (!l1)
5575     return l2;
5576
5577   for (last = l1; last->next_change; last = last->next_change)
5578     continue;
5579   last->next_change = l2;
5580
5581   return l1;
5582 }
5583
5584 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5585
5586 static struct iv_ca_delta *
5587 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5588 {
5589   struct iv_ca_delta *act, *next, *prev = NULL;
5590   struct cost_pair *tmp;
5591
5592   for (act = delta; act; act = next)
5593     {
5594       next = act->next_change;
5595       act->next_change = prev;
5596       prev = act;
5597
5598       tmp = act->old_cp;
5599       act->old_cp = act->new_cp;
5600       act->new_cp = tmp;
5601     }
5602
5603   return prev;
5604 }
5605
5606 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5607    reverted instead.  */
5608
5609 static void
5610 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5611                     struct iv_ca_delta *delta, bool forward)
5612 {
5613   struct cost_pair *from, *to;
5614   struct iv_ca_delta *act;
5615
5616   if (!forward)
5617     delta = iv_ca_delta_reverse (delta);
5618
5619   for (act = delta; act; act = act->next_change)
5620     {
5621       from = act->old_cp;
5622       to = act->new_cp;
5623       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5624       iv_ca_set_cp (data, ivs, act->use, to);
5625     }
5626
5627   if (!forward)
5628     iv_ca_delta_reverse (delta);
5629 }
5630
5631 /* Returns true if CAND is used in IVS.  */
5632
5633 static bool
5634 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5635 {
5636   return ivs->n_cand_uses[cand->id] > 0;
5637 }
5638
5639 /* Returns number of induction variable candidates in the set IVS.  */
5640
5641 static unsigned
5642 iv_ca_n_cands (struct iv_ca *ivs)
5643 {
5644   return ivs->n_cands;
5645 }
5646
5647 /* Free the list of changes DELTA.  */
5648
5649 static void
5650 iv_ca_delta_free (struct iv_ca_delta **delta)
5651 {
5652   struct iv_ca_delta *act, *next;
5653
5654   for (act = *delta; act; act = next)
5655     {
5656       next = act->next_change;
5657       free (act);
5658     }
5659
5660   *delta = NULL;
5661 }
5662
5663 /* Allocates new iv candidates assignment.  */
5664
5665 static struct iv_ca *
5666 iv_ca_new (struct ivopts_data *data)
5667 {
5668   struct iv_ca *nw = XNEW (struct iv_ca);
5669
5670   nw->upto = 0;
5671   nw->bad_uses = 0;
5672   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5673   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5674   nw->cands = BITMAP_ALLOC (NULL);
5675   nw->n_cands = 0;
5676   nw->n_regs = 0;
5677   nw->cand_use_cost = no_cost;
5678   nw->cand_cost = 0;
5679   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5680   nw->cost = no_cost;
5681   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5682   nw->num_used_inv_expr = 0;
5683
5684   return nw;
5685 }
5686
5687 /* Free memory occupied by the set IVS.  */
5688
5689 static void
5690 iv_ca_free (struct iv_ca **ivs)
5691 {
5692   free ((*ivs)->cand_for_use);
5693   free ((*ivs)->n_cand_uses);
5694   BITMAP_FREE ((*ivs)->cands);
5695   free ((*ivs)->n_invariant_uses);
5696   free ((*ivs)->used_inv_expr);
5697   free (*ivs);
5698   *ivs = NULL;
5699 }
5700
5701 /* Dumps IVS to FILE.  */
5702
5703 static void
5704 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5705 {
5706   const char *pref = "  invariants ";
5707   unsigned i;
5708   comp_cost cost = iv_ca_cost (ivs);
5709
5710   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5711   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5712            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5713   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5714
5715    for (i = 0; i < ivs->upto; i++)
5716     {
5717       struct iv_use *use = iv_use (data, i);
5718       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5719       if (cp)
5720         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5721                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5722       else
5723         fprintf (file, "   use:%d --> ??\n", use->id);
5724     }
5725
5726   for (i = 1; i <= data->max_inv_id; i++)
5727     if (ivs->n_invariant_uses[i])
5728       {
5729         fprintf (file, "%s%d", pref, i);
5730         pref = ", ";
5731       }
5732   fprintf (file, "\n\n");
5733 }
5734
5735 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5736    new set, and store differences in DELTA.  Number of induction variables
5737    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5738    the function will try to find a solution with mimimal iv candidates.  */
5739
5740 static comp_cost
5741 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5742               struct iv_cand *cand, struct iv_ca_delta **delta,
5743               unsigned *n_ivs, bool min_ncand)
5744 {
5745   unsigned i;
5746   comp_cost cost;
5747   struct iv_use *use;
5748   struct cost_pair *old_cp, *new_cp;
5749
5750   *delta = NULL;
5751   for (i = 0; i < ivs->upto; i++)
5752     {
5753       use = iv_use (data, i);
5754       old_cp = iv_ca_cand_for_use (ivs, use);
5755
5756       if (old_cp
5757           && old_cp->cand == cand)
5758         continue;
5759
5760       new_cp = get_use_iv_cost (data, use, cand);
5761       if (!new_cp)
5762         continue;
5763
5764       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5765         continue;
5766
5767       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5768         continue;
5769
5770       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5771     }
5772
5773   iv_ca_delta_commit (data, ivs, *delta, true);
5774   cost = iv_ca_cost (ivs);
5775   if (n_ivs)
5776     *n_ivs = iv_ca_n_cands (ivs);
5777   iv_ca_delta_commit (data, ivs, *delta, false);
5778
5779   return cost;
5780 }
5781
5782 /* Try narrowing set IVS by removing CAND.  Return the cost of
5783    the new set and store the differences in DELTA.  START is
5784    the candidate with which we start narrowing.  */
5785
5786 static comp_cost
5787 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5788               struct iv_cand *cand, struct iv_cand *start,
5789               struct iv_ca_delta **delta)
5790 {
5791   unsigned i, ci;
5792   struct iv_use *use;
5793   struct cost_pair *old_cp, *new_cp, *cp;
5794   bitmap_iterator bi;
5795   struct iv_cand *cnd;
5796   comp_cost cost, best_cost, acost;
5797
5798   *delta = NULL;
5799   for (i = 0; i < n_iv_uses (data); i++)
5800     {
5801       use = iv_use (data, i);
5802
5803       old_cp = iv_ca_cand_for_use (ivs, use);
5804       if (old_cp->cand != cand)
5805         continue;
5806
5807       best_cost = iv_ca_cost (ivs);
5808       /* Start narrowing with START.  */
5809       new_cp = get_use_iv_cost (data, use, start);
5810
5811       if (data->consider_all_candidates)
5812         {
5813           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5814             {
5815               if (ci == cand->id || (start && ci == start->id))
5816                 continue;
5817
5818               cnd = iv_cand (data, ci);
5819
5820               cp = get_use_iv_cost (data, use, cnd);
5821               if (!cp)
5822                 continue;
5823
5824               iv_ca_set_cp (data, ivs, use, cp);
5825               acost = iv_ca_cost (ivs);
5826
5827               if (compare_costs (acost, best_cost) < 0)
5828                 {
5829                   best_cost = acost;
5830                   new_cp = cp;
5831                 }
5832             }
5833         }
5834       else
5835         {
5836           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5837             {
5838               if (ci == cand->id || (start && ci == start->id))
5839                 continue;
5840
5841               cnd = iv_cand (data, ci);
5842
5843               cp = get_use_iv_cost (data, use, cnd);
5844               if (!cp)
5845                 continue;
5846
5847               iv_ca_set_cp (data, ivs, use, cp);
5848               acost = iv_ca_cost (ivs);
5849
5850               if (compare_costs (acost, best_cost) < 0)
5851                 {
5852                   best_cost = acost;
5853                   new_cp = cp;
5854                 }
5855             }
5856         }
5857       /* Restore to old cp for use.  */
5858       iv_ca_set_cp (data, ivs, use, old_cp);
5859
5860       if (!new_cp)
5861         {
5862           iv_ca_delta_free (delta);
5863           return infinite_cost;
5864         }
5865
5866       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5867     }
5868
5869   iv_ca_delta_commit (data, ivs, *delta, true);
5870   cost = iv_ca_cost (ivs);
5871   iv_ca_delta_commit (data, ivs, *delta, false);
5872
5873   return cost;
5874 }
5875
5876 /* Try optimizing the set of candidates IVS by removing candidates different
5877    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5878    differences in DELTA.  */
5879
5880 static comp_cost
5881 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5882              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5883 {
5884   bitmap_iterator bi;
5885   struct iv_ca_delta *act_delta, *best_delta;
5886   unsigned i;
5887   comp_cost best_cost, acost;
5888   struct iv_cand *cand;
5889
5890   best_delta = NULL;
5891   best_cost = iv_ca_cost (ivs);
5892
5893   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5894     {
5895       cand = iv_cand (data, i);
5896
5897       if (cand == except_cand)
5898         continue;
5899
5900       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5901
5902       if (compare_costs (acost, best_cost) < 0)
5903         {
5904           best_cost = acost;
5905           iv_ca_delta_free (&best_delta);
5906           best_delta = act_delta;
5907         }
5908       else
5909         iv_ca_delta_free (&act_delta);
5910     }
5911
5912   if (!best_delta)
5913     {
5914       *delta = NULL;
5915       return best_cost;
5916     }
5917
5918   /* Recurse to possibly remove other unnecessary ivs.  */
5919   iv_ca_delta_commit (data, ivs, best_delta, true);
5920   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5921   iv_ca_delta_commit (data, ivs, best_delta, false);
5922   *delta = iv_ca_delta_join (best_delta, *delta);
5923   return best_cost;
5924 }
5925
5926 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5927    cheaper local cost for USE than BEST_CP.  Return pointer to
5928    the corresponding cost_pair, otherwise just return BEST_CP.  */
5929
5930 static struct cost_pair*
5931 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5932                         unsigned int cand_idx, struct iv_cand *old_cand,
5933                         struct cost_pair *best_cp)
5934 {
5935   struct iv_cand *cand;
5936   struct cost_pair *cp;
5937
5938   gcc_assert (old_cand != NULL && best_cp != NULL);
5939   if (cand_idx == old_cand->id)
5940     return best_cp;
5941
5942   cand = iv_cand (data, cand_idx);
5943   cp = get_use_iv_cost (data, use, cand);
5944   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5945     return cp;
5946
5947   return best_cp;
5948 }
5949
5950 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5951    which are used by more than one iv uses.  For each of those candidates,
5952    this function tries to represent iv uses under that candidate using
5953    other ones with lower local cost, then tries to prune the new set.
5954    If the new set has lower cost, It returns the new cost after recording
5955    candidate replacement in list DELTA.  */
5956
5957 static comp_cost
5958 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5959                struct iv_ca_delta **delta)
5960 {
5961   bitmap_iterator bi, bj;
5962   unsigned int i, j, k;
5963   struct iv_use *use;
5964   struct iv_cand *cand;
5965   comp_cost orig_cost, acost;
5966   struct iv_ca_delta *act_delta, *tmp_delta;
5967   struct cost_pair *old_cp, *best_cp = NULL;
5968
5969   *delta = NULL;
5970   orig_cost = iv_ca_cost (ivs);
5971
5972   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5973     {
5974       if (ivs->n_cand_uses[i] == 1
5975           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5976         continue;
5977
5978       cand = iv_cand (data, i);
5979
5980       act_delta = NULL;
5981       /*  Represent uses under current candidate using other ones with
5982           lower local cost.  */
5983       for (j = 0; j < ivs->upto; j++)
5984         {
5985           use = iv_use (data, j);
5986           old_cp = iv_ca_cand_for_use (ivs, use);
5987
5988           if (old_cp->cand != cand)
5989             continue;
5990
5991           best_cp = old_cp;
5992           if (data->consider_all_candidates)
5993             for (k = 0; k < n_iv_cands (data); k++)
5994               best_cp = cheaper_cost_with_cand (data, use, k,
5995                                                 old_cp->cand, best_cp);
5996           else
5997             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5998               best_cp = cheaper_cost_with_cand (data, use, k,
5999                                                 old_cp->cand, best_cp);
6000
6001           if (best_cp == old_cp)
6002             continue;
6003
6004           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6005         }
6006       /* No need for further prune.  */
6007       if (!act_delta)
6008         continue;
6009
6010       /* Prune the new candidate set.  */
6011       iv_ca_delta_commit (data, ivs, act_delta, true);
6012       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6013       iv_ca_delta_commit (data, ivs, act_delta, false);
6014       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6015
6016       if (compare_costs (acost, orig_cost) < 0)
6017         {
6018           *delta = act_delta;
6019           return acost;
6020         }
6021       else
6022         iv_ca_delta_free (&act_delta);
6023     }
6024
6025   return orig_cost;
6026 }
6027
6028 /* Tries to extend the sets IVS in the best possible way in order
6029    to express the USE.  If ORIGINALP is true, prefer candidates from
6030    the original set of IVs, otherwise favor important candidates not
6031    based on any memory object.  */
6032
6033 static bool
6034 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6035                   struct iv_use *use, bool originalp)
6036 {
6037   comp_cost best_cost, act_cost;
6038   unsigned i;
6039   bitmap_iterator bi;
6040   struct iv_cand *cand;
6041   struct iv_ca_delta *best_delta = NULL, *act_delta;
6042   struct cost_pair *cp;
6043
6044   iv_ca_add_use (data, ivs, use);
6045   best_cost = iv_ca_cost (ivs);
6046   cp = iv_ca_cand_for_use (ivs, use);
6047   if (cp)
6048     {
6049       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6050       iv_ca_set_no_cp (data, ivs, use);
6051     }
6052
6053   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6054      first try important candidates not based on any memory object.  Only if
6055      this fails, try the specific ones.  Rationale -- in loops with many
6056      variables the best choice often is to use just one generic biv.  If we
6057      added here many ivs specific to the uses, the optimization algorithm later
6058      would be likely to get stuck in a local minimum, thus causing us to create
6059      too many ivs.  The approach from few ivs to more seems more likely to be
6060      successful -- starting from few ivs, replacing an expensive use by a
6061      specific iv should always be a win.  */
6062   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6063     {
6064       cand = iv_cand (data, i);
6065
6066       if (originalp && cand->pos !=IP_ORIGINAL)
6067         continue;
6068
6069       if (!originalp && cand->iv->base_object != NULL_TREE)
6070         continue;
6071
6072       if (iv_ca_cand_used_p (ivs, cand))
6073         continue;
6074
6075       cp = get_use_iv_cost (data, use, cand);
6076       if (!cp)
6077         continue;
6078
6079       iv_ca_set_cp (data, ivs, use, cp);
6080       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6081                                true);
6082       iv_ca_set_no_cp (data, ivs, use);
6083       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6084
6085       if (compare_costs (act_cost, best_cost) < 0)
6086         {
6087           best_cost = act_cost;
6088
6089           iv_ca_delta_free (&best_delta);
6090           best_delta = act_delta;
6091         }
6092       else
6093         iv_ca_delta_free (&act_delta);
6094     }
6095
6096   if (infinite_cost_p (best_cost))
6097     {
6098       for (i = 0; i < use->n_map_members; i++)
6099         {
6100           cp = use->cost_map + i;
6101           cand = cp->cand;
6102           if (!cand)
6103             continue;
6104
6105           /* Already tried this.  */
6106           if (cand->important)
6107             {
6108               if (originalp && cand->pos == IP_ORIGINAL)
6109                 continue;
6110               if (!originalp && cand->iv->base_object == NULL_TREE)
6111                 continue;
6112             }
6113
6114           if (iv_ca_cand_used_p (ivs, cand))
6115             continue;
6116
6117           act_delta = NULL;
6118           iv_ca_set_cp (data, ivs, use, cp);
6119           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6120           iv_ca_set_no_cp (data, ivs, use);
6121           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6122                                        cp, act_delta);
6123
6124           if (compare_costs (act_cost, best_cost) < 0)
6125             {
6126               best_cost = act_cost;
6127
6128               if (best_delta)
6129                 iv_ca_delta_free (&best_delta);
6130               best_delta = act_delta;
6131             }
6132           else
6133             iv_ca_delta_free (&act_delta);
6134         }
6135     }
6136
6137   iv_ca_delta_commit (data, ivs, best_delta, true);
6138   iv_ca_delta_free (&best_delta);
6139
6140   return !infinite_cost_p (best_cost);
6141 }
6142
6143 /* Finds an initial assignment of candidates to uses.  */
6144
6145 static struct iv_ca *
6146 get_initial_solution (struct ivopts_data *data, bool originalp)
6147 {
6148   struct iv_ca *ivs = iv_ca_new (data);
6149   unsigned i;
6150
6151   for (i = 0; i < n_iv_uses (data); i++)
6152     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6153       {
6154         iv_ca_free (&ivs);
6155         return NULL;
6156       }
6157
6158   return ivs;
6159 }
6160
6161 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6162    points to a bool variable, this function tries to break local
6163    optimal fixed-point by replacing candidates in IVS if it's true.  */
6164
6165 static bool
6166 try_improve_iv_set (struct ivopts_data *data,
6167                     struct iv_ca *ivs, bool *try_replace_p)
6168 {
6169   unsigned i, n_ivs;
6170   comp_cost acost, best_cost = iv_ca_cost (ivs);
6171   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6172   struct iv_cand *cand;
6173
6174   /* Try extending the set of induction variables by one.  */
6175   for (i = 0; i < n_iv_cands (data); i++)
6176     {
6177       cand = iv_cand (data, i);
6178
6179       if (iv_ca_cand_used_p (ivs, cand))
6180         continue;
6181
6182       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6183       if (!act_delta)
6184         continue;
6185
6186       /* If we successfully added the candidate and the set is small enough,
6187          try optimizing it by removing other candidates.  */
6188       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6189         {
6190           iv_ca_delta_commit (data, ivs, act_delta, true);
6191           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6192           iv_ca_delta_commit (data, ivs, act_delta, false);
6193           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6194         }
6195
6196       if (compare_costs (acost, best_cost) < 0)
6197         {
6198           best_cost = acost;
6199           iv_ca_delta_free (&best_delta);
6200           best_delta = act_delta;
6201         }
6202       else
6203         iv_ca_delta_free (&act_delta);
6204     }
6205
6206   if (!best_delta)
6207     {
6208       /* Try removing the candidates from the set instead.  */
6209       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6210
6211       if (!best_delta && *try_replace_p)
6212         {
6213           *try_replace_p = false;
6214           /* So far candidate selecting algorithm tends to choose fewer IVs
6215              so that it can handle cases in which loops have many variables
6216              but the best choice is often to use only one general biv.  One
6217              weakness is it can't handle opposite cases, in which different
6218              candidates should be chosen with respect to each use.  To solve
6219              the problem, we replace candidates in a manner described by the
6220              comments of iv_ca_replace, thus give general algorithm a chance
6221              to break local optimal fixed-point in these cases.  */
6222           best_cost = iv_ca_replace (data, ivs, &best_delta);
6223         }
6224
6225       if (!best_delta)
6226         return false;
6227     }
6228
6229   iv_ca_delta_commit (data, ivs, best_delta, true);
6230   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6231   iv_ca_delta_free (&best_delta);
6232   return true;
6233 }
6234
6235 /* Attempts to find the optimal set of induction variables.  We do simple
6236    greedy heuristic -- we try to replace at most one candidate in the selected
6237    solution and remove the unused ivs while this improves the cost.  */
6238
6239 static struct iv_ca *
6240 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6241 {
6242   struct iv_ca *set;
6243   bool try_replace_p = true;
6244
6245   /* Get the initial solution.  */
6246   set = get_initial_solution (data, originalp);
6247   if (!set)
6248     {
6249       if (dump_file && (dump_flags & TDF_DETAILS))
6250         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6251       return NULL;
6252     }
6253
6254   if (dump_file && (dump_flags & TDF_DETAILS))
6255     {
6256       fprintf (dump_file, "Initial set of candidates:\n");
6257       iv_ca_dump (data, dump_file, set);
6258     }
6259
6260   while (try_improve_iv_set (data, set, &try_replace_p))
6261     {
6262       if (dump_file && (dump_flags & TDF_DETAILS))
6263         {
6264           fprintf (dump_file, "Improved to:\n");
6265           iv_ca_dump (data, dump_file, set);
6266         }
6267     }
6268
6269   return set;
6270 }
6271
6272 static struct iv_ca *
6273 find_optimal_iv_set (struct ivopts_data *data)
6274 {
6275   unsigned i;
6276   struct iv_ca *set, *origset;
6277   struct iv_use *use;
6278   comp_cost cost, origcost;
6279
6280   /* Determine the cost based on a strategy that starts with original IVs,
6281      and try again using a strategy that prefers candidates not based
6282      on any IVs.  */
6283   origset = find_optimal_iv_set_1 (data, true);
6284   set = find_optimal_iv_set_1 (data, false);
6285
6286   if (!origset && !set)
6287     return NULL;
6288
6289   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6290   cost = set ? iv_ca_cost (set) : infinite_cost;
6291
6292   if (dump_file && (dump_flags & TDF_DETAILS))
6293     {
6294       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6295                origcost.cost, origcost.complexity);
6296       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6297                cost.cost, cost.complexity);
6298     }
6299
6300   /* Choose the one with the best cost.  */
6301   if (compare_costs (origcost, cost) <= 0)
6302     {
6303       if (set)
6304         iv_ca_free (&set);
6305       set = origset;
6306     }
6307   else if (origset)
6308     iv_ca_free (&origset);
6309
6310   for (i = 0; i < n_iv_uses (data); i++)
6311     {
6312       use = iv_use (data, i);
6313       use->selected = iv_ca_cand_for_use (set, use)->cand;
6314     }
6315
6316   return set;
6317 }
6318
6319 /* Creates a new induction variable corresponding to CAND.  */
6320
6321 static void
6322 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6323 {
6324   gimple_stmt_iterator incr_pos;
6325   tree base;
6326   bool after = false;
6327
6328   if (!cand->iv)
6329     return;
6330
6331   switch (cand->pos)
6332     {
6333     case IP_NORMAL:
6334       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6335       break;
6336
6337     case IP_END:
6338       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6339       after = true;
6340       break;
6341
6342     case IP_AFTER_USE:
6343       after = true;
6344       /* fall through */
6345     case IP_BEFORE_USE:
6346       incr_pos = gsi_for_stmt (cand->incremented_at);
6347       break;
6348
6349     case IP_ORIGINAL:
6350       /* Mark that the iv is preserved.  */
6351       name_info (data, cand->var_before)->preserve_biv = true;
6352       name_info (data, cand->var_after)->preserve_biv = true;
6353
6354       /* Rewrite the increment so that it uses var_before directly.  */
6355       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6356       return;
6357     }
6358
6359   gimple_add_tmp_var (cand->var_before);
6360
6361   base = unshare_expr (cand->iv->base);
6362
6363   create_iv (base, unshare_expr (cand->iv->step),
6364              cand->var_before, data->current_loop,
6365              &incr_pos, after, &cand->var_before, &cand->var_after);
6366 }
6367
6368 /* Creates new induction variables described in SET.  */
6369
6370 static void
6371 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6372 {
6373   unsigned i;
6374   struct iv_cand *cand;
6375   bitmap_iterator bi;
6376
6377   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6378     {
6379       cand = iv_cand (data, i);
6380       create_new_iv (data, cand);
6381     }
6382
6383   if (dump_file && (dump_flags & TDF_DETAILS))
6384     {
6385       fprintf (dump_file, "Selected IV set for loop %d",
6386                data->current_loop->num);
6387       if (data->loop_loc != UNKNOWN_LOCATION)
6388         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6389                  LOCATION_LINE (data->loop_loc));
6390       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6391       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6392         {
6393           cand = iv_cand (data, i);
6394           dump_cand (dump_file, cand);
6395         }
6396       fprintf (dump_file, "\n");
6397     }
6398 }
6399
6400 /* Rewrites USE (definition of iv used in a nonlinear expression)
6401    using candidate CAND.  */
6402
6403 static void
6404 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6405                             struct iv_use *use, struct iv_cand *cand)
6406 {
6407   tree comp;
6408   tree op, tgt;
6409   gassign *ass;
6410   gimple_stmt_iterator bsi;
6411
6412   /* An important special case -- if we are asked to express value of
6413      the original iv by itself, just exit; there is no need to
6414      introduce a new computation (that might also need casting the
6415      variable to unsigned and back).  */
6416   if (cand->pos == IP_ORIGINAL
6417       && cand->incremented_at == use->stmt)
6418     {
6419       enum tree_code stmt_code;
6420
6421       gcc_assert (is_gimple_assign (use->stmt));
6422       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6423
6424       /* Check whether we may leave the computation unchanged.
6425          This is the case only if it does not rely on other
6426          computations in the loop -- otherwise, the computation
6427          we rely upon may be removed in remove_unused_ivs,
6428          thus leading to ICE.  */
6429       stmt_code = gimple_assign_rhs_code (use->stmt);
6430       if (stmt_code == PLUS_EXPR
6431           || stmt_code == MINUS_EXPR
6432           || stmt_code == POINTER_PLUS_EXPR)
6433         {
6434           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6435             op = gimple_assign_rhs2 (use->stmt);
6436           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6437             op = gimple_assign_rhs1 (use->stmt);
6438           else
6439             op = NULL_TREE;
6440         }
6441       else
6442         op = NULL_TREE;
6443
6444       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6445         return;
6446     }
6447
6448   comp = get_computation (data->current_loop, use, cand);
6449   gcc_assert (comp != NULL_TREE);
6450
6451   switch (gimple_code (use->stmt))
6452     {
6453     case GIMPLE_PHI:
6454       tgt = PHI_RESULT (use->stmt);
6455
6456       /* If we should keep the biv, do not replace it.  */
6457       if (name_info (data, tgt)->preserve_biv)
6458         return;
6459
6460       bsi = gsi_after_labels (gimple_bb (use->stmt));
6461       break;
6462
6463     case GIMPLE_ASSIGN:
6464       tgt = gimple_assign_lhs (use->stmt);
6465       bsi = gsi_for_stmt (use->stmt);
6466       break;
6467
6468     default:
6469       gcc_unreachable ();
6470     }
6471
6472   if (!valid_gimple_rhs_p (comp)
6473       || (gimple_code (use->stmt) != GIMPLE_PHI
6474           /* We can't allow re-allocating the stmt as it might be pointed
6475              to still.  */
6476           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6477               >= gimple_num_ops (gsi_stmt (bsi)))))
6478     {
6479       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6480                                        true, GSI_SAME_STMT);
6481       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6482         {
6483           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6484           /* As this isn't a plain copy we have to reset alignment
6485              information.  */
6486           if (SSA_NAME_PTR_INFO (comp))
6487             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6488         }
6489     }
6490
6491   if (gimple_code (use->stmt) == GIMPLE_PHI)
6492     {
6493       ass = gimple_build_assign (tgt, comp);
6494       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6495
6496       bsi = gsi_for_stmt (use->stmt);
6497       remove_phi_node (&bsi, false);
6498     }
6499   else
6500     {
6501       gimple_assign_set_rhs_from_tree (&bsi, comp);
6502       use->stmt = gsi_stmt (bsi);
6503     }
6504 }
6505
6506 /* Performs a peephole optimization to reorder the iv update statement with
6507    a mem ref to enable instruction combining in later phases. The mem ref uses
6508    the iv value before the update, so the reordering transformation requires
6509    adjustment of the offset. CAND is the selected IV_CAND.
6510
6511    Example:
6512
6513    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6514    iv2 = iv1 + 1;
6515
6516    if (t < val)      (1)
6517      goto L;
6518    goto Head;
6519
6520
6521    directly propagating t over to (1) will introduce overlapping live range
6522    thus increase register pressure. This peephole transform it into:
6523
6524
6525    iv2 = iv1 + 1;
6526    t = MEM_REF (base, iv2, 8, 8);
6527    if (t < val)
6528      goto L;
6529    goto Head;
6530 */
6531
6532 static void
6533 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6534 {
6535   tree var_after;
6536   gimple iv_update, stmt;
6537   basic_block bb;
6538   gimple_stmt_iterator gsi, gsi_iv;
6539
6540   if (cand->pos != IP_NORMAL)
6541     return;
6542
6543   var_after = cand->var_after;
6544   iv_update = SSA_NAME_DEF_STMT (var_after);
6545
6546   bb = gimple_bb (iv_update);
6547   gsi = gsi_last_nondebug_bb (bb);
6548   stmt = gsi_stmt (gsi);
6549
6550   /* Only handle conditional statement for now.  */
6551   if (gimple_code (stmt) != GIMPLE_COND)
6552     return;
6553
6554   gsi_prev_nondebug (&gsi);
6555   stmt = gsi_stmt (gsi);
6556   if (stmt != iv_update)
6557     return;
6558
6559   gsi_prev_nondebug (&gsi);
6560   if (gsi_end_p (gsi))
6561     return;
6562
6563   stmt = gsi_stmt (gsi);
6564   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6565     return;
6566
6567   if (stmt != use->stmt)
6568     return;
6569
6570   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6571     return;
6572
6573   if (dump_file && (dump_flags & TDF_DETAILS))
6574     {
6575       fprintf (dump_file, "Reordering \n");
6576       print_gimple_stmt (dump_file, iv_update, 0, 0);
6577       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6578       fprintf (dump_file, "\n");
6579     }
6580
6581   gsi = gsi_for_stmt (use->stmt);
6582   gsi_iv = gsi_for_stmt (iv_update);
6583   gsi_move_before (&gsi_iv, &gsi);
6584
6585   cand->pos = IP_BEFORE_USE;
6586   cand->incremented_at = use->stmt;
6587 }
6588
6589 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6590
6591 static void
6592 rewrite_use_address (struct ivopts_data *data,
6593                      struct iv_use *use, struct iv_cand *cand)
6594 {
6595   aff_tree aff;
6596   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6597   tree base_hint = NULL_TREE;
6598   tree ref, iv;
6599   bool ok;
6600
6601   adjust_iv_update_pos (cand, use);
6602   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6603   gcc_assert (ok);
6604   unshare_aff_combination (&aff);
6605
6606   /* To avoid undefined overflow problems, all IV candidates use unsigned
6607      integer types.  The drawback is that this makes it impossible for
6608      create_mem_ref to distinguish an IV that is based on a memory object
6609      from one that represents simply an offset.
6610
6611      To work around this problem, we pass a hint to create_mem_ref that
6612      indicates which variable (if any) in aff is an IV based on a memory
6613      object.  Note that we only consider the candidate.  If this is not
6614      based on an object, the base of the reference is in some subexpression
6615      of the use -- but these will use pointer types, so they are recognized
6616      by the create_mem_ref heuristics anyway.  */
6617   if (cand->iv->base_object)
6618     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6619
6620   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6621   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6622                         reference_alias_ptr_type (*use->op_p),
6623                         iv, base_hint, data->speed);
6624   copy_ref_info (ref, *use->op_p);
6625   *use->op_p = ref;
6626 }
6627
6628 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6629    candidate CAND.  */
6630
6631 static void
6632 rewrite_use_compare (struct ivopts_data *data,
6633                      struct iv_use *use, struct iv_cand *cand)
6634 {
6635   tree comp, *var_p, op, bound;
6636   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6637   enum tree_code compare;
6638   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6639   bool ok;
6640
6641   bound = cp->value;
6642   if (bound)
6643     {
6644       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6645       tree var_type = TREE_TYPE (var);
6646       gimple_seq stmts;
6647
6648       if (dump_file && (dump_flags & TDF_DETAILS))
6649         {
6650           fprintf (dump_file, "Replacing exit test: ");
6651           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6652         }
6653       compare = cp->comp;
6654       bound = unshare_expr (fold_convert (var_type, bound));
6655       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6656       if (stmts)
6657         gsi_insert_seq_on_edge_immediate (
6658                 loop_preheader_edge (data->current_loop),
6659                 stmts);
6660
6661       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6662       gimple_cond_set_lhs (cond_stmt, var);
6663       gimple_cond_set_code (cond_stmt, compare);
6664       gimple_cond_set_rhs (cond_stmt, op);
6665       return;
6666     }
6667
6668   /* The induction variable elimination failed; just express the original
6669      giv.  */
6670   comp = get_computation (data->current_loop, use, cand);
6671   gcc_assert (comp != NULL_TREE);
6672
6673   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6674   gcc_assert (ok);
6675
6676   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6677                                      true, GSI_SAME_STMT);
6678 }
6679
6680 /* Rewrites USE using candidate CAND.  */
6681
6682 static void
6683 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6684 {
6685   switch (use->type)
6686     {
6687       case USE_NONLINEAR_EXPR:
6688         rewrite_use_nonlinear_expr (data, use, cand);
6689         break;
6690
6691       case USE_ADDRESS:
6692         rewrite_use_address (data, use, cand);
6693         break;
6694
6695       case USE_COMPARE:
6696         rewrite_use_compare (data, use, cand);
6697         break;
6698
6699       default:
6700         gcc_unreachable ();
6701     }
6702
6703   update_stmt (use->stmt);
6704 }
6705
6706 /* Rewrite the uses using the selected induction variables.  */
6707
6708 static void
6709 rewrite_uses (struct ivopts_data *data)
6710 {
6711   unsigned i;
6712   struct iv_cand *cand;
6713   struct iv_use *use;
6714
6715   for (i = 0; i < n_iv_uses (data); i++)
6716     {
6717       use = iv_use (data, i);
6718       cand = use->selected;
6719       gcc_assert (cand);
6720
6721       rewrite_use (data, use, cand);
6722     }
6723 }
6724
6725 /* Removes the ivs that are not used after rewriting.  */
6726
6727 static void
6728 remove_unused_ivs (struct ivopts_data *data)
6729 {
6730   unsigned j;
6731   bitmap_iterator bi;
6732   bitmap toremove = BITMAP_ALLOC (NULL);
6733
6734   /* Figure out an order in which to release SSA DEFs so that we don't
6735      release something that we'd have to propagate into a debug stmt
6736      afterwards.  */
6737   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6738     {
6739       struct version_info *info;
6740
6741       info = ver_info (data, j);
6742       if (info->iv
6743           && !integer_zerop (info->iv->step)
6744           && !info->inv_id
6745           && !info->iv->have_use_for
6746           && !info->preserve_biv)
6747         {
6748           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6749
6750           tree def = info->iv->ssa_name;
6751
6752           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6753             {
6754               imm_use_iterator imm_iter;
6755               use_operand_p use_p;
6756               gimple stmt;
6757               int count = 0;
6758
6759               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6760                 {
6761                   if (!gimple_debug_bind_p (stmt))
6762                     continue;
6763
6764                   /* We just want to determine whether to do nothing
6765                      (count == 0), to substitute the computed
6766                      expression into a single use of the SSA DEF by
6767                      itself (count == 1), or to use a debug temp
6768                      because the SSA DEF is used multiple times or as
6769                      part of a larger expression (count > 1). */
6770                   count++;
6771                   if (gimple_debug_bind_get_value (stmt) != def)
6772                     count++;
6773
6774                   if (count > 1)
6775                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6776                 }
6777
6778               if (!count)
6779                 continue;
6780
6781               struct iv_use dummy_use;
6782               struct iv_cand *best_cand = NULL, *cand;
6783               unsigned i, best_pref = 0, cand_pref;
6784
6785               memset (&dummy_use, 0, sizeof (dummy_use));
6786               dummy_use.iv = info->iv;
6787               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6788                 {
6789                   cand = iv_use (data, i)->selected;
6790                   if (cand == best_cand)
6791                     continue;
6792                   cand_pref = operand_equal_p (cand->iv->step,
6793                                                info->iv->step, 0)
6794                     ? 4 : 0;
6795                   cand_pref
6796                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6797                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6798                     ? 2 : 0;
6799                   cand_pref
6800                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6801                     ? 1 : 0;
6802                   if (best_cand == NULL || best_pref < cand_pref)
6803                     {
6804                       best_cand = cand;
6805                       best_pref = cand_pref;
6806                     }
6807                 }
6808
6809               if (!best_cand)
6810                 continue;
6811
6812               tree comp = get_computation_at (data->current_loop,
6813                                               &dummy_use, best_cand,
6814                                               SSA_NAME_DEF_STMT (def));
6815               if (!comp)
6816                 continue;
6817
6818               if (count > 1)
6819                 {
6820                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6821                   DECL_ARTIFICIAL (vexpr) = 1;
6822                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6823                   if (SSA_NAME_VAR (def))
6824                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6825                   else
6826                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6827                   gdebug *def_temp
6828                     = gimple_build_debug_bind (vexpr, comp, NULL);
6829                   gimple_stmt_iterator gsi;
6830
6831                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6832                     gsi = gsi_after_labels (gimple_bb
6833                                             (SSA_NAME_DEF_STMT (def)));
6834                   else
6835                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6836
6837                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6838                   comp = vexpr;
6839                 }
6840
6841               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6842                 {
6843                   if (!gimple_debug_bind_p (stmt))
6844                     continue;
6845
6846                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6847                     SET_USE (use_p, comp);
6848
6849                   update_stmt (stmt);
6850                 }
6851             }
6852         }
6853     }
6854
6855   release_defs_bitset (toremove);
6856
6857   BITMAP_FREE (toremove);
6858 }
6859
6860 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6861    for hash_map::traverse.  */
6862
6863 bool
6864 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6865 {
6866   free (value);
6867   return true;
6868 }
6869
6870 /* Frees data allocated by the optimization of a single loop.  */
6871
6872 static void
6873 free_loop_data (struct ivopts_data *data)
6874 {
6875   unsigned i, j;
6876   bitmap_iterator bi;
6877   tree obj;
6878
6879   if (data->niters)
6880     {
6881       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6882       delete data->niters;
6883       data->niters = NULL;
6884     }
6885
6886   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6887     {
6888       struct version_info *info;
6889
6890       info = ver_info (data, i);
6891       free (info->iv);
6892       info->iv = NULL;
6893       info->has_nonlin_use = false;
6894       info->preserve_biv = false;
6895       info->inv_id = 0;
6896     }
6897   bitmap_clear (data->relevant);
6898   bitmap_clear (data->important_candidates);
6899
6900   for (i = 0; i < n_iv_uses (data); i++)
6901     {
6902       struct iv_use *use = iv_use (data, i);
6903
6904       free (use->iv);
6905       BITMAP_FREE (use->related_cands);
6906       for (j = 0; j < use->n_map_members; j++)
6907         if (use->cost_map[j].depends_on)
6908           BITMAP_FREE (use->cost_map[j].depends_on);
6909       free (use->cost_map);
6910       free (use);
6911     }
6912   data->iv_uses.truncate (0);
6913
6914   for (i = 0; i < n_iv_cands (data); i++)
6915     {
6916       struct iv_cand *cand = iv_cand (data, i);
6917
6918       free (cand->iv);
6919       if (cand->depends_on)
6920         BITMAP_FREE (cand->depends_on);
6921       free (cand);
6922     }
6923   data->iv_candidates.truncate (0);
6924
6925   if (data->version_info_size < num_ssa_names)
6926     {
6927       data->version_info_size = 2 * num_ssa_names;
6928       free (data->version_info);
6929       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6930     }
6931
6932   data->max_inv_id = 0;
6933
6934   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6935     SET_DECL_RTL (obj, NULL_RTX);
6936
6937   decl_rtl_to_reset.truncate (0);
6938
6939   data->inv_expr_tab->empty ();
6940   data->inv_expr_id = 0;
6941 }
6942
6943 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6944    loop tree.  */
6945
6946 static void
6947 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6948 {
6949   free_loop_data (data);
6950   free (data->version_info);
6951   BITMAP_FREE (data->relevant);
6952   BITMAP_FREE (data->important_candidates);
6953
6954   decl_rtl_to_reset.release ();
6955   data->iv_uses.release ();
6956   data->iv_candidates.release ();
6957   delete data->inv_expr_tab;
6958   data->inv_expr_tab = NULL;
6959   free_affine_expand_cache (&data->name_expansion_cache);
6960 }
6961
6962 /* Returns true if the loop body BODY includes any function calls.  */
6963
6964 static bool
6965 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6966 {
6967   gimple_stmt_iterator gsi;
6968   unsigned i;
6969
6970   for (i = 0; i < num_nodes; i++)
6971     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6972       {
6973         gimple stmt = gsi_stmt (gsi);
6974         if (is_gimple_call (stmt)
6975             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6976           return true;
6977       }
6978   return false;
6979 }
6980
6981 /* Optimizes the LOOP.  Returns true if anything changed.  */
6982
6983 static bool
6984 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6985 {
6986   bool changed = false;
6987   struct iv_ca *iv_ca;
6988   edge exit = single_dom_exit (loop);
6989   basic_block *body;
6990
6991   gcc_assert (!data->niters);
6992   data->current_loop = loop;
6993   data->loop_loc = find_loop_location (loop);
6994   data->speed = optimize_loop_for_speed_p (loop);
6995
6996   if (dump_file && (dump_flags & TDF_DETAILS))
6997     {
6998       fprintf (dump_file, "Processing loop %d", loop->num);
6999       if (data->loop_loc != UNKNOWN_LOCATION)
7000         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7001                  LOCATION_LINE (data->loop_loc));
7002       fprintf (dump_file, "\n");
7003
7004       if (exit)
7005         {
7006           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7007                    exit->src->index, exit->dest->index);
7008           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7009           fprintf (dump_file, "\n");
7010         }
7011
7012       fprintf (dump_file, "\n");
7013     }
7014
7015   body = get_loop_body (loop);
7016   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7017   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7018   free (body);
7019
7020   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7021
7022   /* For each ssa name determines whether it behaves as an induction variable
7023      in some loop.  */
7024   if (!find_induction_variables (data))
7025     goto finish;
7026
7027   /* Finds interesting uses (item 1).  */
7028   find_interesting_uses (data);
7029   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7030     goto finish;
7031
7032   /* Finds candidates for the induction variables (item 2).  */
7033   find_iv_candidates (data);
7034
7035   /* Calculates the costs (item 3, part 1).  */
7036   determine_iv_costs (data);
7037   determine_use_iv_costs (data);
7038   determine_set_costs (data);
7039
7040   /* Find the optimal set of induction variables (item 3, part 2).  */
7041   iv_ca = find_optimal_iv_set (data);
7042   if (!iv_ca)
7043     goto finish;
7044   changed = true;
7045
7046   /* Create the new induction variables (item 4, part 1).  */
7047   create_new_ivs (data, iv_ca);
7048   iv_ca_free (&iv_ca);
7049
7050   /* Rewrite the uses (item 4, part 2).  */
7051   rewrite_uses (data);
7052
7053   /* Remove the ivs that are unused after rewriting.  */
7054   remove_unused_ivs (data);
7055
7056   /* We have changed the structure of induction variables; it might happen
7057      that definitions in the scev database refer to some of them that were
7058      eliminated.  */
7059   scev_reset ();
7060
7061 finish:
7062   free_loop_data (data);
7063
7064   return changed;
7065 }
7066
7067 /* Main entry point.  Optimizes induction variables in loops.  */
7068
7069 void
7070 tree_ssa_iv_optimize (void)
7071 {
7072   struct loop *loop;
7073   struct ivopts_data data;
7074
7075   tree_ssa_iv_optimize_init (&data);
7076
7077   /* Optimize the loops starting with the innermost ones.  */
7078   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7079     {
7080       if (dump_file && (dump_flags & TDF_DETAILS))
7081         flow_loop_dump (loop, dump_file, NULL, 1);
7082
7083       tree_ssa_iv_optimize_loop (&data, loop);
7084     }
7085
7086   tree_ssa_iv_optimize_finalize (&data);
7087 }