gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "pointer-set.h"
  74 #include "hash-table.h"
  75 #include "tree-ssa-alias.h"
  76 #include "internal-fn.h"
  77 #include "tree-eh.h"
  78 #include "gimple-expr.h"
  79 #include "is-a.h"
  80 #include "gimple.h"
  81 #include "gimplify.h"
  82 #include "gimple-iterator.h"
  83 #include "gimplify-me.h"
  84 #include "gimple-ssa.h"
  85 #include "cgraph.h"
  86 #include "tree-cfg.h"
  87 #include "tree-phinodes.h"
  88 #include "ssa-iterators.h"
  89 #include "stringpool.h"
  90 #include "tree-ssanames.h"
  91 #include "tree-ssa-loop-ivopts.h"
  92 #include "tree-ssa-loop-manip.h"
  93 #include "tree-ssa-loop-niter.h"
  94 #include "tree-ssa-loop.h"
  95 #include "expr.h"
  96 #include "tree-dfa.h"
  97 #include "tree-ssa.h"
  98 #include "cfgloop.h"
  99 #include "tree-pass.h"
 100 #include "insn-config.h"
 101 #include "tree-chrec.h"
 102 #include "tree-scalar-evolution.h"
 103 #include "cfgloop.h"
 104 #include "params.h"
 105 #include "langhooks.h"
 106 #include "tree-affine.h"
 107 #include "target.h"
 108 #include "tree-inline.h"
 109 #include "tree-ssa-propagate.h"
 110 #include "expmed.h"
 111 #include "tree-ssa-address.h"
 112
 113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 114    cost of different addressing modes.  This should be moved to a TBD
 115    interface between the GIMPLE and RTL worlds.  */
 116 #include "expr.h"
 117 #include "recog.h"
 118
 119 /* The infinite cost.  */
 120 #define INFTY 10000000
 121
 122 #define AVG_LOOP_NITER(LOOP) 5
 123
 124 /* Returns the expected number of loop iterations for LOOP.
 125    The average trip count is computed from profile data if it
 126    exists. */
 127
 128 static inline HOST_WIDE_INT
 129 avg_loop_niter (struct loop *loop)
 130 {
 131   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 132   if (niter == -1)
 133     return AVG_LOOP_NITER (loop);
 134
 135   return niter;
 136 }
 137
 138 /* Representation of the induction variable.  */
 139 struct iv
 140 {
 141   tree base;            /* Initial value of the iv.  */
 142   tree base_object;     /* A memory object to that the induction variable points.  */
 143   tree step;            /* Step of the iv (constant only).  */
 144   tree ssa_name;        /* The ssa name with the value.  */
 145   bool biv_p;           /* Is it a biv?  */
 146   bool have_use_for;    /* Do we already have a use for it?  */
 147   unsigned use_id;      /* The identifier in the use if it is the case.  */
 148 };
 149
 150 /* Per-ssa version information (induction variable descriptions, etc.).  */
 151 struct version_info
 152 {
 153   tree name;            /* The ssa name.  */
 154   struct iv *iv;        /* Induction variable description.  */
 155   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 156                            an expression that is not an induction variable.  */
 157   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 158   unsigned inv_id;      /* Id of an invariant.  */
 159 };
 160
 161 /* Types of uses.  */
 162 enum use_type
 163 {
 164   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 165   USE_ADDRESS,          /* Use in an address.  */
 166   USE_COMPARE           /* Use is a compare.  */
 167 };
 168
 169 /* Cost of a computation.  */
 170 typedef struct
 171 {
 172   int cost;             /* The runtime cost.  */
 173   unsigned complexity;  /* The estimate of the complexity of the code for
 174                            the computation (in no concrete units --
 175                            complexity field should be larger for more
 176                            complex expressions and addressing modes).  */
 177 } comp_cost;
 178
 179 static const comp_cost no_cost = {0, 0};
 180 static const comp_cost infinite_cost = {INFTY, INFTY};
 181
 182 /* The candidate - cost pair.  */
 183 struct cost_pair
 184 {
 185   struct iv_cand *cand; /* The candidate.  */
 186   comp_cost cost;       /* The cost.  */
 187   bitmap depends_on;    /* The list of invariants that have to be
 188                            preserved.  */
 189   tree value;           /* For final value elimination, the expression for
 190                            the final value of the iv.  For iv elimination,
 191                            the new bound to compare with.  */
 192   enum tree_code comp;  /* For iv elimination, the comparison.  */
 193   int inv_expr_id;      /* Loop invariant expression id.  */
 194 };
 195
 196 /* Use.  */
 197 struct iv_use
 198 {
 199   unsigned id;          /* The id of the use.  */
 200   enum use_type type;   /* Type of the use.  */
 201   struct iv *iv;        /* The induction variable it is based on.  */
 202   gimple stmt;          /* Statement in that it occurs.  */
 203   tree *op_p;           /* The place where it occurs.  */
 204   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 205                            important ones.  */
 206
 207   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 208   struct cost_pair *cost_map;
 209                         /* The costs wrto the iv candidates.  */
 210
 211   struct iv_cand *selected;
 212                         /* The selected candidate.  */
 213 };
 214
 215 /* The position where the iv is computed.  */
 216 enum iv_position
 217 {
 218   IP_NORMAL,            /* At the end, just before the exit condition.  */
 219   IP_END,               /* At the end of the latch block.  */
 220   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 221   IP_AFTER_USE,         /* Immediately after a specific use.  */
 222   IP_ORIGINAL           /* The original biv.  */
 223 };
 224
 225 /* The induction variable candidate.  */
 226 struct iv_cand
 227 {
 228   unsigned id;          /* The number of the candidate.  */
 229   bool important;       /* Whether this is an "important" candidate, i.e. such
 230                            that it should be considered by all uses.  */
 231   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 232   gimple incremented_at;/* For original biv, the statement where it is
 233                            incremented.  */
 234   tree var_before;      /* The variable used for it before increment.  */
 235   tree var_after;       /* The variable used for it after increment.  */
 236   struct iv *iv;        /* The value of the candidate.  NULL for
 237                            "pseudocandidate" used to indicate the possibility
 238                            to replace the final value of an iv by direct
 239                            computation of the value.  */
 240   unsigned cost;        /* Cost of the candidate.  */
 241   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 242   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 243                               where it is incremented.  */
 244   bitmap depends_on;    /* The list of invariants that are used in step of the
 245                            biv.  */
 246 };
 247
 248 /* Loop invariant expression hashtable entry.  */
 249 struct iv_inv_expr_ent
 250 {
 251   tree expr;
 252   int id;
 253   hashval_t hash;
 254 };
 255
 256 /* The data used by the induction variable optimizations.  */
 257
 258 typedef struct iv_use *iv_use_p;
 259
 260 typedef struct iv_cand *iv_cand_p;
 261
 262 /* Hashtable helpers.  */
 263
 264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 265 {
 266   typedef iv_inv_expr_ent value_type;
 267   typedef iv_inv_expr_ent compare_type;
 268   static inline hashval_t hash (const value_type *);
 269   static inline bool equal (const value_type *, const compare_type *);
 270 };
 271
 272 /* Hash function for loop invariant expressions.  */
 273
 274 inline hashval_t
 275 iv_inv_expr_hasher::hash (const value_type *expr)
 276 {
 277   return expr->hash;
 278 }
 279
 280 /* Hash table equality function for expressions.  */
 281
 282 inline bool
 283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 284 {
 285   return expr1->hash == expr2->hash
 286          && operand_equal_p (expr1->expr, expr2->expr, 0);
 287 }
 288
 289 struct ivopts_data
 290 {
 291   /* The currently optimized loop.  */
 292   struct loop *current_loop;
 293
 294   /* Numbers of iterations for all exits of the current loop.  */
 295   struct pointer_map_t *niters;
 296
 297   /* Number of registers used in it.  */
 298   unsigned regs_used;
 299
 300   /* The size of version_info array allocated.  */
 301   unsigned version_info_size;
 302
 303   /* The array of information for the ssa names.  */
 304   struct version_info *version_info;
 305
 306   /* The hashtable of loop invariant expressions created
 307      by ivopt.  */
 308   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 309
 310   /* Loop invariant expression id.  */
 311   int inv_expr_id;
 312
 313   /* The bitmap of indices in version_info whose value was changed.  */
 314   bitmap relevant;
 315
 316   /* The uses of induction variables.  */
 317   vec<iv_use_p> iv_uses;
 318
 319   /* The candidates.  */
 320   vec<iv_cand_p> iv_candidates;
 321
 322   /* A bitmap of important candidates.  */
 323   bitmap important_candidates;
 324
 325   /* The maximum invariant id.  */
 326   unsigned max_inv_id;
 327
 328   /* Whether to consider just related and important candidates when replacing a
 329      use.  */
 330   bool consider_all_candidates;
 331
 332   /* Are we optimizing for speed?  */
 333   bool speed;
 334
 335   /* Whether the loop body includes any function calls.  */
 336   bool body_includes_call;
 337
 338   /* Whether the loop body can only be exited via single exit.  */
 339   bool loop_single_exit_p;
 340 };
 341
 342 /* An assignment of iv candidates to uses.  */
 343
 344 struct iv_ca
 345 {
 346   /* The number of uses covered by the assignment.  */
 347   unsigned upto;
 348
 349   /* Number of uses that cannot be expressed by the candidates in the set.  */
 350   unsigned bad_uses;
 351
 352   /* Candidate assigned to a use, together with the related costs.  */
 353   struct cost_pair **cand_for_use;
 354
 355   /* Number of times each candidate is used.  */
 356   unsigned *n_cand_uses;
 357
 358   /* The candidates used.  */
 359   bitmap cands;
 360
 361   /* The number of candidates in the set.  */
 362   unsigned n_cands;
 363
 364   /* Total number of registers needed.  */
 365   unsigned n_regs;
 366
 367   /* Total cost of expressing uses.  */
 368   comp_cost cand_use_cost;
 369
 370   /* Total cost of candidates.  */
 371   unsigned cand_cost;
 372
 373   /* Number of times each invariant is used.  */
 374   unsigned *n_invariant_uses;
 375
 376   /* The array holding the number of uses of each loop
 377      invariant expressions created by ivopt.  */
 378   unsigned *used_inv_expr;
 379
 380   /* The number of created loop invariants.  */
 381   unsigned num_used_inv_expr;
 382
 383   /* Total cost of the assignment.  */
 384   comp_cost cost;
 385 };
 386
 387 /* Difference of two iv candidate assignments.  */
 388
 389 struct iv_ca_delta
 390 {
 391   /* Changed use.  */
 392   struct iv_use *use;
 393
 394   /* An old assignment (for rollback purposes).  */
 395   struct cost_pair *old_cp;
 396
 397   /* A new assignment.  */
 398   struct cost_pair *new_cp;
 399
 400   /* Next change in the list.  */
 401   struct iv_ca_delta *next_change;
 402 };
 403
 404 /* Bound on number of candidates below that all candidates are considered.  */
 405
 406 #define CONSIDER_ALL_CANDIDATES_BOUND \
 407   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 408
 409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 410    optimizing such a loop would help, and it would take ages).  */
 411
 412 #define MAX_CONSIDERED_USES \
 413   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 414
 415 /* If there are at most this number of ivs in the set, try removing unnecessary
 416    ivs from the set always.  */
 417
 418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 419   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 420
 421 /* The list of trees for that the decl_rtl field must be reset is stored
 422    here.  */
 423
 424 static vec<tree> decl_rtl_to_reset;
 425
 426 static comp_cost force_expr_to_var_cost (tree, bool);
 427
 428 /* Number of uses recorded in DATA.  */
 429
 430 static inline unsigned
 431 n_iv_uses (struct ivopts_data *data)
 432 {
 433   return data->iv_uses.length ();
 434 }
 435
 436 /* Ith use recorded in DATA.  */
 437
 438 static inline struct iv_use *
 439 iv_use (struct ivopts_data *data, unsigned i)
 440 {
 441   return data->iv_uses[i];
 442 }
 443
 444 /* Number of candidates recorded in DATA.  */
 445
 446 static inline unsigned
 447 n_iv_cands (struct ivopts_data *data)
 448 {
 449   return data->iv_candidates.length ();
 450 }
 451
 452 /* Ith candidate recorded in DATA.  */
 453
 454 static inline struct iv_cand *
 455 iv_cand (struct ivopts_data *data, unsigned i)
 456 {
 457   return data->iv_candidates[i];
 458 }
 459
 460 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 461
 462 edge
 463 single_dom_exit (struct loop *loop)
 464 {
 465   edge exit = single_exit (loop);
 466
 467   if (!exit)
 468     return NULL;
 469
 470   if (!just_once_each_iteration_p (loop, exit->src))
 471     return NULL;
 472
 473   return exit;
 474 }
 475
 476 /* Dumps information about the induction variable IV to FILE.  */
 477
 478 void
 479 dump_iv (FILE *file, struct iv *iv)
 480 {
 481   if (iv->ssa_name)
 482     {
 483       fprintf (file, "ssa name ");
 484       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 485       fprintf (file, "\n");
 486     }
 487
 488   fprintf (file, "  type ");
 489   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 490   fprintf (file, "\n");
 491
 492   if (iv->step)
 493     {
 494       fprintf (file, "  base ");
 495       print_generic_expr (file, iv->base, TDF_SLIM);
 496       fprintf (file, "\n");
 497
 498       fprintf (file, "  step ");
 499       print_generic_expr (file, iv->step, TDF_SLIM);
 500       fprintf (file, "\n");
 501     }
 502   else
 503     {
 504       fprintf (file, "  invariant ");
 505       print_generic_expr (file, iv->base, TDF_SLIM);
 506       fprintf (file, "\n");
 507     }
 508
 509   if (iv->base_object)
 510     {
 511       fprintf (file, "  base object ");
 512       print_generic_expr (file, iv->base_object, TDF_SLIM);
 513       fprintf (file, "\n");
 514     }
 515
 516   if (iv->biv_p)
 517     fprintf (file, "  is a biv\n");
 518 }
 519
 520 /* Dumps information about the USE to FILE.  */
 521
 522 void
 523 dump_use (FILE *file, struct iv_use *use)
 524 {
 525   fprintf (file, "use %d\n", use->id);
 526
 527   switch (use->type)
 528     {
 529     case USE_NONLINEAR_EXPR:
 530       fprintf (file, "  generic\n");
 531       break;
 532
 533     case USE_ADDRESS:
 534       fprintf (file, "  address\n");
 535       break;
 536
 537     case USE_COMPARE:
 538       fprintf (file, "  compare\n");
 539       break;
 540
 541     default:
 542       gcc_unreachable ();
 543     }
 544
 545   fprintf (file, "  in statement ");
 546   print_gimple_stmt (file, use->stmt, 0, 0);
 547   fprintf (file, "\n");
 548
 549   fprintf (file, "  at position ");
 550   if (use->op_p)
 551     print_generic_expr (file, *use->op_p, TDF_SLIM);
 552   fprintf (file, "\n");
 553
 554   dump_iv (file, use->iv);
 555
 556   if (use->related_cands)
 557     {
 558       fprintf (file, "  related candidates ");
 559       dump_bitmap (file, use->related_cands);
 560     }
 561 }
 562
 563 /* Dumps information about the uses to FILE.  */
 564
 565 void
 566 dump_uses (FILE *file, struct ivopts_data *data)
 567 {
 568   unsigned i;
 569   struct iv_use *use;
 570
 571   for (i = 0; i < n_iv_uses (data); i++)
 572     {
 573       use = iv_use (data, i);
 574
 575       dump_use (file, use);
 576       fprintf (file, "\n");
 577     }
 578 }
 579
 580 /* Dumps information about induction variable candidate CAND to FILE.  */
 581
 582 void
 583 dump_cand (FILE *file, struct iv_cand *cand)
 584 {
 585   struct iv *iv = cand->iv;
 586
 587   fprintf (file, "candidate %d%s\n",
 588            cand->id, cand->important ? " (important)" : "");
 589
 590   if (cand->depends_on)
 591     {
 592       fprintf (file, "  depends on ");
 593       dump_bitmap (file, cand->depends_on);
 594     }
 595
 596   if (!iv)
 597     {
 598       fprintf (file, "  final value replacement\n");
 599       return;
 600     }
 601
 602   if (cand->var_before)
 603     {
 604       fprintf (file, "  var_before ");
 605       print_generic_expr (file, cand->var_before, TDF_SLIM);
 606       fprintf (file, "\n");
 607     }
 608   if (cand->var_after)
 609     {
 610       fprintf (file, "  var_after ");
 611       print_generic_expr (file, cand->var_after, TDF_SLIM);
 612       fprintf (file, "\n");
 613     }
 614
 615   switch (cand->pos)
 616     {
 617     case IP_NORMAL:
 618       fprintf (file, "  incremented before exit test\n");
 619       break;
 620
 621     case IP_BEFORE_USE:
 622       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 623       break;
 624
 625     case IP_AFTER_USE:
 626       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 627       break;
 628
 629     case IP_END:
 630       fprintf (file, "  incremented at end\n");
 631       break;
 632
 633     case IP_ORIGINAL:
 634       fprintf (file, "  original biv\n");
 635       break;
 636     }
 637
 638   dump_iv (file, iv);
 639 }
 640
 641 /* Returns the info for ssa version VER.  */
 642
 643 static inline struct version_info *
 644 ver_info (struct ivopts_data *data, unsigned ver)
 645 {
 646   return data->version_info + ver;
 647 }
 648
 649 /* Returns the info for ssa name NAME.  */
 650
 651 static inline struct version_info *
 652 name_info (struct ivopts_data *data, tree name)
 653 {
 654   return ver_info (data, SSA_NAME_VERSION (name));
 655 }
 656
 657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 658    emitted in LOOP.  */
 659
 660 static bool
 661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 662 {
 663   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 664
 665   gcc_assert (bb);
 666
 667   if (sbb == loop->latch)
 668     return true;
 669
 670   if (sbb != bb)
 671     return false;
 672
 673   return stmt == last_stmt (bb);
 674 }
 675
 676 /* Returns true if STMT if after the place where the original induction
 677    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 678    if the positions are identical.  */
 679
 680 static bool
 681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 682 {
 683   basic_block cand_bb = gimple_bb (cand->incremented_at);
 684   basic_block stmt_bb = gimple_bb (stmt);
 685
 686   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 687     return false;
 688
 689   if (stmt_bb != cand_bb)
 690     return true;
 691
 692   if (true_if_equal
 693       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 694     return true;
 695   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 696 }
 697
 698 /* Returns true if STMT if after the place where the induction variable
 699    CAND is incremented in LOOP.  */
 700
 701 static bool
 702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 703 {
 704   switch (cand->pos)
 705     {
 706     case IP_END:
 707       return false;
 708
 709     case IP_NORMAL:
 710       return stmt_after_ip_normal_pos (loop, stmt);
 711
 712     case IP_ORIGINAL:
 713     case IP_AFTER_USE:
 714       return stmt_after_inc_pos (cand, stmt, false);
 715
 716     case IP_BEFORE_USE:
 717       return stmt_after_inc_pos (cand, stmt, true);
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722 }
 723
 724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 725
 726 static bool
 727 abnormal_ssa_name_p (tree exp)
 728 {
 729   if (!exp)
 730     return false;
 731
 732   if (TREE_CODE (exp) != SSA_NAME)
 733     return false;
 734
 735   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 736 }
 737
 738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 739    abnormal phi node.  Callback for for_each_index.  */
 740
 741 static bool
 742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 743                                   void *data ATTRIBUTE_UNUSED)
 744 {
 745   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 746     {
 747       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 748         return false;
 749       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 750         return false;
 751     }
 752
 753   return !abnormal_ssa_name_p (*index);
 754 }
 755
 756 /* Returns true if EXPR contains a ssa name that occurs in an
 757    abnormal phi node.  */
 758
 759 bool
 760 contains_abnormal_ssa_name_p (tree expr)
 761 {
 762   enum tree_code code;
 763   enum tree_code_class codeclass;
 764
 765   if (!expr)
 766     return false;
 767
 768   code = TREE_CODE (expr);
 769   codeclass = TREE_CODE_CLASS (code);
 770
 771   if (code == SSA_NAME)
 772     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 773
 774   if (code == INTEGER_CST
 775       || is_gimple_min_invariant (expr))
 776     return false;
 777
 778   if (code == ADDR_EXPR)
 779     return !for_each_index (&TREE_OPERAND (expr, 0),
 780                             idx_contains_abnormal_ssa_name_p,
 781                             NULL);
 782
 783   if (code == COND_EXPR)
 784     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 785       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 786       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 787
 788   switch (codeclass)
 789     {
 790     case tcc_binary:
 791     case tcc_comparison:
 792       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 793         return true;
 794
 795       /* Fallthru.  */
 796     case tcc_unary:
 797       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 798         return true;
 799
 800       break;
 801
 802     default:
 803       gcc_unreachable ();
 804     }
 805
 806   return false;
 807 }
 808
 809 /*  Returns the structure describing number of iterations determined from
 810     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 811
 812 static struct tree_niter_desc *
 813 niter_for_exit (struct ivopts_data *data, edge exit)
 814 {
 815   struct tree_niter_desc *desc;
 816   void **slot;
 817
 818   if (!data->niters)
 819     {
 820       data->niters = pointer_map_create ();
 821       slot = NULL;
 822     }
 823   else
 824     slot = pointer_map_contains (data->niters, exit);
 825
 826   if (!slot)
 827     {
 828       /* Try to determine number of iterations.  We cannot safely work with ssa
 829          names that appear in phi nodes on abnormal edges, so that we do not
 830          create overlapping life ranges for them (PR 27283).  */
 831       desc = XNEW (struct tree_niter_desc);
 832       if (!number_of_iterations_exit (data->current_loop,
 833                                       exit, desc, true)
 834           || contains_abnormal_ssa_name_p (desc->niter))
 835         {
 836           XDELETE (desc);
 837           desc = NULL;
 838         }
 839       slot = pointer_map_insert (data->niters, exit);
 840       *slot = desc;
 841     }
 842   else
 843     desc = (struct tree_niter_desc *) *slot;
 844
 845   return desc;
 846 }
 847
 848 /* Returns the structure describing number of iterations determined from
 849    single dominating exit of DATA->current_loop, or NULL if something
 850    goes wrong.  */
 851
 852 static struct tree_niter_desc *
 853 niter_for_single_dom_exit (struct ivopts_data *data)
 854 {
 855   edge exit = single_dom_exit (data->current_loop);
 856
 857   if (!exit)
 858     return NULL;
 859
 860   return niter_for_exit (data, exit);
 861 }
 862
 863 /* Initializes data structures used by the iv optimization pass, stored
 864    in DATA.  */
 865
 866 static void
 867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 868 {
 869   data->version_info_size = 2 * num_ssa_names;
 870   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 871   data->relevant = BITMAP_ALLOC (NULL);
 872   data->important_candidates = BITMAP_ALLOC (NULL);
 873   data->max_inv_id = 0;
 874   data->niters = NULL;
 875   data->iv_uses.create (20);
 876   data->iv_candidates.create (20);
 877   data->inv_expr_tab.create (10);
 878   data->inv_expr_id = 0;
 879   decl_rtl_to_reset.create (20);
 880 }
 881
 882 /* Returns a memory object to that EXPR points.  In case we are able to
 883    determine that it does not point to any such object, NULL is returned.  */
 884
 885 static tree
 886 determine_base_object (tree expr)
 887 {
 888   enum tree_code code = TREE_CODE (expr);
 889   tree base, obj;
 890
 891   /* If this is a pointer casted to any type, we need to determine
 892      the base object for the pointer; so handle conversions before
 893      throwing away non-pointer expressions.  */
 894   if (CONVERT_EXPR_P (expr))
 895     return determine_base_object (TREE_OPERAND (expr, 0));
 896
 897   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 898     return NULL_TREE;
 899
 900   switch (code)
 901     {
 902     case INTEGER_CST:
 903       return NULL_TREE;
 904
 905     case ADDR_EXPR:
 906       obj = TREE_OPERAND (expr, 0);
 907       base = get_base_address (obj);
 908
 909       if (!base)
 910         return expr;
 911
 912       if (TREE_CODE (base) == MEM_REF)
 913         return determine_base_object (TREE_OPERAND (base, 0));
 914
 915       return fold_convert (ptr_type_node,
 916                            build_fold_addr_expr (base));
 917
 918     case POINTER_PLUS_EXPR:
 919       return determine_base_object (TREE_OPERAND (expr, 0));
 920
 921     case PLUS_EXPR:
 922     case MINUS_EXPR:
 923       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 924       gcc_unreachable ();
 925
 926     default:
 927       return fold_convert (ptr_type_node, expr);
 928     }
 929 }
 930
 931 /* Allocates an induction variable with given initial value BASE and step STEP
 932    for loop LOOP.  */
 933
 934 static struct iv *
 935 alloc_iv (tree base, tree step)
 936 {
 937   tree base_object = base;
 938   struct iv *iv = XCNEW (struct iv);
 939   gcc_assert (step != NULL_TREE);
 940
 941   /* Lower all address expressions except ones with DECL_P as operand.
 942      By doing this:
 943        1) More accurate cost can be computed for address expressions;
 944        2) Duplicate candidates won't be created for bases in different
 945           forms, like &a[0] and &a.  */
 946   STRIP_NOPS (base_object);
 947   if (TREE_CODE (base_object) == ADDR_EXPR
 948       && !DECL_P (TREE_OPERAND (base_object, 0)))
 949     {
 950       aff_tree comb;
 951       double_int size;
 952       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
 953                                              &comb, &size);
 954       gcc_assert (base_object != NULL_TREE);
 955       base_object = build_fold_addr_expr (base_object);
 956       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 957     }
 958
 959   iv->base = base;
 960   iv->base_object = determine_base_object (base_object);
 961   iv->step = step;
 962   iv->biv_p = false;
 963   iv->have_use_for = false;
 964   iv->use_id = 0;
 965   iv->ssa_name = NULL_TREE;
 966
 967   return iv;
 968 }
 969
 970 /* Sets STEP and BASE for induction variable IV.  */
 971
 972 static void
 973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 974 {
 975   struct version_info *info = name_info (data, iv);
 976
 977   gcc_assert (!info->iv);
 978
 979   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 980   info->iv = alloc_iv (base, step);
 981   info->iv->ssa_name = iv;
 982 }
 983
 984 /* Finds induction variable declaration for VAR.  */
 985
 986 static struct iv *
 987 get_iv (struct ivopts_data *data, tree var)
 988 {
 989   basic_block bb;
 990   tree type = TREE_TYPE (var);
 991
 992   if (!POINTER_TYPE_P (type)
 993       && !INTEGRAL_TYPE_P (type))
 994     return NULL;
 995
 996   if (!name_info (data, var)->iv)
 997     {
 998       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 999
1000       if (!bb
1001           || !flow_bb_inside_loop_p (data->current_loop, bb))
1002         set_iv (data, var, var, build_int_cst (type, 0));
1003     }
1004
1005   return name_info (data, var)->iv;
1006 }
1007
1008 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1009    not define a simple affine biv with nonzero step.  */
1010
1011 static tree
1012 determine_biv_step (gimple phi)
1013 {
1014   struct loop *loop = gimple_bb (phi)->loop_father;
1015   tree name = PHI_RESULT (phi);
1016   affine_iv iv;
1017
1018   if (virtual_operand_p (name))
1019     return NULL_TREE;
1020
1021   if (!simple_iv (loop, loop, name, &iv, true))
1022     return NULL_TREE;
1023
1024   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1025 }
1026
1027 /* Finds basic ivs.  */
1028
1029 static bool
1030 find_bivs (struct ivopts_data *data)
1031 {
1032   gimple phi;
1033   tree step, type, base;
1034   bool found = false;
1035   struct loop *loop = data->current_loop;
1036   gimple_stmt_iterator psi;
1037
1038   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1039     {
1040       phi = gsi_stmt (psi);
1041
1042       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043         continue;
1044
1045       step = determine_biv_step (phi);
1046       if (!step)
1047         continue;
1048
1049       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050       base = expand_simple_operations (base);
1051       if (contains_abnormal_ssa_name_p (base)
1052           || contains_abnormal_ssa_name_p (step))
1053         continue;
1054
1055       type = TREE_TYPE (PHI_RESULT (phi));
1056       base = fold_convert (type, base);
1057       if (step)
1058         {
1059           if (POINTER_TYPE_P (type))
1060             step = convert_to_ptrofftype (step);
1061           else
1062             step = fold_convert (type, step);
1063         }
1064
1065       set_iv (data, PHI_RESULT (phi), base, step);
1066       found = true;
1067     }
1068
1069   return found;
1070 }
1071
1072 /* Marks basic ivs.  */
1073
1074 static void
1075 mark_bivs (struct ivopts_data *data)
1076 {
1077   gimple phi, def;
1078   tree var;
1079   struct iv *iv, *incr_iv;
1080   struct loop *loop = data->current_loop;
1081   basic_block incr_bb;
1082   gimple_stmt_iterator psi;
1083
1084   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085     {
1086       phi = gsi_stmt (psi);
1087
1088       iv = get_iv (data, PHI_RESULT (phi));
1089       if (!iv)
1090         continue;
1091
1092       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093       def = SSA_NAME_DEF_STMT (var);
1094       /* Don't mark iv peeled from other one as biv.  */
1095       if (def
1096           && gimple_code (def) == GIMPLE_PHI
1097           && gimple_bb (def) == loop->header)
1098         continue;
1099
1100       incr_iv = get_iv (data, var);
1101       if (!incr_iv)
1102         continue;
1103
1104       /* If the increment is in the subloop, ignore it.  */
1105       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1106       if (incr_bb->loop_father != data->current_loop
1107           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1108         continue;
1109
1110       iv->biv_p = true;
1111       incr_iv->biv_p = true;
1112     }
1113 }
1114
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116    parameters to IV.  */
1117
1118 static bool
1119 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1120 {
1121   tree lhs;
1122   struct loop *loop = data->current_loop;
1123
1124   iv->base = NULL_TREE;
1125   iv->step = NULL_TREE;
1126
1127   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1128     return false;
1129
1130   lhs = gimple_assign_lhs (stmt);
1131   if (TREE_CODE (lhs) != SSA_NAME)
1132     return false;
1133
1134   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1135     return false;
1136   iv->base = expand_simple_operations (iv->base);
1137
1138   if (contains_abnormal_ssa_name_p (iv->base)
1139       || contains_abnormal_ssa_name_p (iv->step))
1140     return false;
1141
1142   /* If STMT could throw, then do not consider STMT as defining a GIV.
1143      While this will suppress optimizations, we can not safely delete this
1144      GIV and associated statements, even if it appears it is not used.  */
1145   if (stmt_could_throw_p (stmt))
1146     return false;
1147
1148   return true;
1149 }
1150
1151 /* Finds general ivs in statement STMT.  */
1152
1153 static void
1154 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1155 {
1156   affine_iv iv;
1157
1158   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1159     return;
1160
1161   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1162 }
1163
1164 /* Finds general ivs in basic block BB.  */
1165
1166 static void
1167 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1168 {
1169   gimple_stmt_iterator bsi;
1170
1171   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1172     find_givs_in_stmt (data, gsi_stmt (bsi));
1173 }
1174
1175 /* Finds general ivs.  */
1176
1177 static void
1178 find_givs (struct ivopts_data *data)
1179 {
1180   struct loop *loop = data->current_loop;
1181   basic_block *body = get_loop_body_in_dom_order (loop);
1182   unsigned i;
1183
1184   for (i = 0; i < loop->num_nodes; i++)
1185     find_givs_in_bb (data, body[i]);
1186   free (body);
1187 }
1188
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190    variable and if so, its initial value and step.  */
1191
1192 static bool
1193 find_induction_variables (struct ivopts_data *data)
1194 {
1195   unsigned i;
1196   bitmap_iterator bi;
1197
1198   if (!find_bivs (data))
1199     return false;
1200
1201   find_givs (data);
1202   mark_bivs (data);
1203
1204   if (dump_file && (dump_flags & TDF_DETAILS))
1205     {
1206       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1207
1208       if (niter)
1209         {
1210           fprintf (dump_file, "  number of iterations ");
1211           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1212           if (!integer_zerop (niter->may_be_zero))
1213             {
1214               fprintf (dump_file, "; zero if ");
1215               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1216             }
1217           fprintf (dump_file, "\n\n");
1218         };
1219
1220       fprintf (dump_file, "Induction variables:\n\n");
1221
1222       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1223         {
1224           if (ver_info (data, i)->iv)
1225             dump_iv (dump_file, ver_info (data, i)->iv);
1226         }
1227     }
1228
1229   return true;
1230 }
1231
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1233
1234 static struct iv_use *
1235 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1236             gimple stmt, enum use_type use_type)
1237 {
1238   struct iv_use *use = XCNEW (struct iv_use);
1239
1240   use->id = n_iv_uses (data);
1241   use->type = use_type;
1242   use->iv = iv;
1243   use->stmt = stmt;
1244   use->op_p = use_p;
1245   use->related_cands = BITMAP_ALLOC (NULL);
1246
1247   /* To avoid showing ssa name in the dumps, if it was not reset by the
1248      caller.  */
1249   iv->ssa_name = NULL_TREE;
1250
1251   if (dump_file && (dump_flags & TDF_DETAILS))
1252     dump_use (dump_file, use);
1253
1254   data->iv_uses.safe_push (use);
1255
1256   return use;
1257 }
1258
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260    NONLINEAR_USE is true if the invariant is used in a way we do not
1261    handle specially.  */
1262
1263 static void
1264 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1265 {
1266   basic_block bb;
1267   struct version_info *info;
1268
1269   if (TREE_CODE (op) != SSA_NAME
1270       || virtual_operand_p (op))
1271     return;
1272
1273   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1274   if (bb
1275       && flow_bb_inside_loop_p (data->current_loop, bb))
1276     return;
1277
1278   info = name_info (data, op);
1279   info->name = op;
1280   info->has_nonlin_use |= nonlinear_use;
1281   if (!info->inv_id)
1282     info->inv_id = ++data->max_inv_id;
1283   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1284 }
1285
1286 /* Checks whether the use OP is interesting and if so, records it.  */
1287
1288 static struct iv_use *
1289 find_interesting_uses_op (struct ivopts_data *data, tree op)
1290 {
1291   struct iv *iv;
1292   struct iv *civ;
1293   gimple stmt;
1294   struct iv_use *use;
1295
1296   if (TREE_CODE (op) != SSA_NAME)
1297     return NULL;
1298
1299   iv = get_iv (data, op);
1300   if (!iv)
1301     return NULL;
1302
1303   if (iv->have_use_for)
1304     {
1305       use = iv_use (data, iv->use_id);
1306
1307       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1308       return use;
1309     }
1310
1311   if (integer_zerop (iv->step))
1312     {
1313       record_invariant (data, op, true);
1314       return NULL;
1315     }
1316   iv->have_use_for = true;
1317
1318   civ = XNEW (struct iv);
1319   *civ = *iv;
1320
1321   stmt = SSA_NAME_DEF_STMT (op);
1322   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1323               || is_gimple_assign (stmt));
1324
1325   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1326   iv->use_id = use->id;
1327
1328   return use;
1329 }
1330
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332    of an induction variable and an invariant.  If this is the case,
1333    CONTROL_VAR is set to location of the iv, BOUND to the location of
1334    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335    induction variable descriptions, and true is returned.  If this is not
1336    the case, CONTROL_VAR and BOUND are set to the arguments of the
1337    condition and false is returned.  */
1338
1339 static bool
1340 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1341                        tree **control_var, tree **bound,
1342                        struct iv **iv_var, struct iv **iv_bound)
1343 {
1344   /* The objects returned when COND has constant operands.  */
1345   static struct iv const_iv;
1346   static tree zero;
1347   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1348   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1349   bool ret = false;
1350
1351   if (gimple_code (stmt) == GIMPLE_COND)
1352     {
1353       op0 = gimple_cond_lhs_ptr (stmt);
1354       op1 = gimple_cond_rhs_ptr (stmt);
1355     }
1356   else
1357     {
1358       op0 = gimple_assign_rhs1_ptr (stmt);
1359       op1 = gimple_assign_rhs2_ptr (stmt);
1360     }
1361
1362   zero = integer_zero_node;
1363   const_iv.step = integer_zero_node;
1364
1365   if (TREE_CODE (*op0) == SSA_NAME)
1366     iv0 = get_iv (data, *op0);
1367   if (TREE_CODE (*op1) == SSA_NAME)
1368     iv1 = get_iv (data, *op1);
1369
1370   /* Exactly one of the compared values must be an iv, and the other one must
1371      be an invariant.  */
1372   if (!iv0 || !iv1)
1373     goto end;
1374
1375   if (integer_zerop (iv0->step))
1376     {
1377       /* Control variable may be on the other side.  */
1378       tmp_op = op0; op0 = op1; op1 = tmp_op;
1379       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1380     }
1381   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1382
1383 end:
1384   if (control_var)
1385     *control_var = op0;;
1386   if (iv_var)
1387     *iv_var = iv0;;
1388   if (bound)
1389     *bound = op1;
1390   if (iv_bound)
1391     *iv_bound = iv1;
1392
1393   return ret;
1394 }
1395
1396 /* Checks whether the condition in STMT is interesting and if so,
1397    records it.  */
1398
1399 static void
1400 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1401 {
1402   tree *var_p, *bound_p;
1403   struct iv *var_iv, *civ;
1404
1405   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1406     {
1407       find_interesting_uses_op (data, *var_p);
1408       find_interesting_uses_op (data, *bound_p);
1409       return;
1410     }
1411
1412   civ = XNEW (struct iv);
1413   *civ = *var_iv;
1414   record_use (data, NULL, civ, stmt, USE_COMPARE);
1415 }
1416
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418    relative to the loop LOOP, i.e. if all its operands are defined
1419    outside of the returned loop.  Returns NULL if EXPR is not
1420    even obviously invariant in LOOP.  */
1421
1422 struct loop *
1423 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1424 {
1425   basic_block def_bb;
1426   unsigned i, len;
1427
1428   if (is_gimple_min_invariant (expr))
1429     return current_loops->tree_root;
1430
1431   if (TREE_CODE (expr) == SSA_NAME)
1432     {
1433       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1434       if (def_bb)
1435         {
1436           if (flow_bb_inside_loop_p (loop, def_bb))
1437             return NULL;
1438           return superloop_at_depth (loop,
1439                                      loop_depth (def_bb->loop_father) + 1);
1440         }
1441
1442       return current_loops->tree_root;
1443     }
1444
1445   if (!EXPR_P (expr))
1446     return NULL;
1447
1448   unsigned maxdepth = 0;
1449   len = TREE_OPERAND_LENGTH (expr);
1450   for (i = 0; i < len; i++)
1451     {
1452       struct loop *ivloop;
1453       if (!TREE_OPERAND (expr, i))
1454         continue;
1455
1456       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1457       if (!ivloop)
1458         return NULL;
1459       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1460     }
1461
1462   return superloop_at_depth (loop, maxdepth);
1463 }
1464
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466    i.e. if all its operands are defined outside of the LOOP.  LOOP
1467    should not be the function body.  */
1468
1469 bool
1470 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1471 {
1472   basic_block def_bb;
1473   unsigned i, len;
1474
1475   gcc_assert (loop_depth (loop) > 0);
1476
1477   if (is_gimple_min_invariant (expr))
1478     return true;
1479
1480   if (TREE_CODE (expr) == SSA_NAME)
1481     {
1482       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1483       if (def_bb
1484           && flow_bb_inside_loop_p (loop, def_bb))
1485         return false;
1486
1487       return true;
1488     }
1489
1490   if (!EXPR_P (expr))
1491     return false;
1492
1493   len = TREE_OPERAND_LENGTH (expr);
1494   for (i = 0; i < len; i++)
1495     if (TREE_OPERAND (expr, i)
1496         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1497       return false;
1498
1499   return true;
1500 }
1501
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503    initial ones.  Returns false when the value of the index cannot be determined.
1504    Callback for for_each_index.  */
1505
1506 struct ifs_ivopts_data
1507 {
1508   struct ivopts_data *ivopts_data;
1509   gimple stmt;
1510   tree step;
1511 };
1512
1513 static bool
1514 idx_find_step (tree base, tree *idx, void *data)
1515 {
1516   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1517   struct iv *iv;
1518   tree step, iv_base, iv_step, lbound, off;
1519   struct loop *loop = dta->ivopts_data->current_loop;
1520
1521   /* If base is a component ref, require that the offset of the reference
1522      be invariant.  */
1523   if (TREE_CODE (base) == COMPONENT_REF)
1524     {
1525       off = component_ref_field_offset (base);
1526       return expr_invariant_in_loop_p (loop, off);
1527     }
1528
1529   /* If base is array, first check whether we will be able to move the
1530      reference out of the loop (in order to take its address in strength
1531      reduction).  In order for this to work we need both lower bound
1532      and step to be loop invariants.  */
1533   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534     {
1535       /* Moreover, for a range, the size needs to be invariant as well.  */
1536       if (TREE_CODE (base) == ARRAY_RANGE_REF
1537           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1538         return false;
1539
1540       step = array_ref_element_size (base);
1541       lbound = array_ref_low_bound (base);
1542
1543       if (!expr_invariant_in_loop_p (loop, step)
1544           || !expr_invariant_in_loop_p (loop, lbound))
1545         return false;
1546     }
1547
1548   if (TREE_CODE (*idx) != SSA_NAME)
1549     return true;
1550
1551   iv = get_iv (dta->ivopts_data, *idx);
1552   if (!iv)
1553     return false;
1554
1555   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1556           *&x[0], which is not folded and does not trigger the
1557           ARRAY_REF path below.  */
1558   *idx = iv->base;
1559
1560   if (integer_zerop (iv->step))
1561     return true;
1562
1563   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1564     {
1565       step = array_ref_element_size (base);
1566
1567       /* We only handle addresses whose step is an integer constant.  */
1568       if (TREE_CODE (step) != INTEGER_CST)
1569         return false;
1570     }
1571   else
1572     /* The step for pointer arithmetics already is 1 byte.  */
1573     step = size_one_node;
1574
1575   iv_base = iv->base;
1576   iv_step = iv->step;
1577   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1578                             sizetype, &iv_base, &iv_step, dta->stmt,
1579                             false))
1580     {
1581       /* The index might wrap.  */
1582       return false;
1583     }
1584
1585   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1586   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1587
1588   return true;
1589 }
1590
1591 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1592    object is passed to it in DATA.  */
1593
1594 static bool
1595 idx_record_use (tree base, tree *idx,
1596                 void *vdata)
1597 {
1598   struct ivopts_data *data = (struct ivopts_data *) vdata;
1599   find_interesting_uses_op (data, *idx);
1600   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1601     {
1602       find_interesting_uses_op (data, array_ref_element_size (base));
1603       find_interesting_uses_op (data, array_ref_low_bound (base));
1604     }
1605   return true;
1606 }
1607
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609    store cst to MUL and return true.  Otherwise return false.
1610    The returned value is always sign-extended, regardless of the
1611    signedness of TOP and BOT.  */
1612
1613 static bool
1614 constant_multiple_of (tree top, tree bot, double_int *mul)
1615 {
1616   tree mby;
1617   enum tree_code code;
1618   double_int res, p0, p1;
1619   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1620
1621   STRIP_NOPS (top);
1622   STRIP_NOPS (bot);
1623
1624   if (operand_equal_p (top, bot, 0))
1625     {
1626       *mul = double_int_one;
1627       return true;
1628     }
1629
1630   code = TREE_CODE (top);
1631   switch (code)
1632     {
1633     case MULT_EXPR:
1634       mby = TREE_OPERAND (top, 1);
1635       if (TREE_CODE (mby) != INTEGER_CST)
1636         return false;
1637
1638       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1639         return false;
1640
1641       *mul = (res * tree_to_double_int (mby)).sext (precision);
1642       return true;
1643
1644     case PLUS_EXPR:
1645     case MINUS_EXPR:
1646       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1647           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1648         return false;
1649
1650       if (code == MINUS_EXPR)
1651         p1 = -p1;
1652       *mul = (p0 + p1).sext (precision);
1653       return true;
1654
1655     case INTEGER_CST:
1656       if (TREE_CODE (bot) != INTEGER_CST)
1657         return false;
1658
1659       p0 = tree_to_double_int (top).sext (precision);
1660       p1 = tree_to_double_int (bot).sext (precision);
1661       if (p1.is_zero ())
1662         return false;
1663       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1664       return res.is_zero ();
1665
1666     default:
1667       return false;
1668     }
1669 }
1670
1671 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1672
1673 static bool
1674 may_be_unaligned_p (tree ref, tree step)
1675 {
1676   tree base;
1677   tree base_type;
1678   HOST_WIDE_INT bitsize;
1679   HOST_WIDE_INT bitpos;
1680   tree toffset;
1681   enum machine_mode mode;
1682   int unsignedp, volatilep;
1683   unsigned base_align;
1684
1685   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1686      thus they are not misaligned.  */
1687   if (TREE_CODE (ref) == TARGET_MEM_REF)
1688     return false;
1689
1690   /* The test below is basically copy of what expr.c:normal_inner_ref
1691      does to check whether the object must be loaded by parts when
1692      STRICT_ALIGNMENT is true.  */
1693   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1694                               &unsignedp, &volatilep, true);
1695   base_type = TREE_TYPE (base);
1696   base_align = get_object_alignment (base);
1697   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1698
1699   if (mode != BLKmode)
1700     {
1701       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1702
1703       if (base_align < mode_align
1704           || (bitpos % mode_align) != 0
1705           || (bitpos % BITS_PER_UNIT) != 0)
1706         return true;
1707
1708       if (toffset
1709           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1710         return true;
1711
1712       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1713         return true;
1714     }
1715
1716   return false;
1717 }
1718
1719 /* Return true if EXPR may be non-addressable.   */
1720
1721 bool
1722 may_be_nonaddressable_p (tree expr)
1723 {
1724   switch (TREE_CODE (expr))
1725     {
1726     case TARGET_MEM_REF:
1727       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1728          target, thus they are always addressable.  */
1729       return false;
1730
1731     case COMPONENT_REF:
1732       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1733              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1734
1735     case VIEW_CONVERT_EXPR:
1736       /* This kind of view-conversions may wrap non-addressable objects
1737          and make them look addressable.  After some processing the
1738          non-addressability may be uncovered again, causing ADDR_EXPRs
1739          of inappropriate objects to be built.  */
1740       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1741           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1742         return true;
1743
1744       /* ... fall through ... */
1745
1746     case ARRAY_REF:
1747     case ARRAY_RANGE_REF:
1748       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1749
1750     CASE_CONVERT:
1751       return true;
1752
1753     default:
1754       break;
1755     }
1756
1757   return false;
1758 }
1759
1760 /* Finds addresses in *OP_P inside STMT.  */
1761
1762 static void
1763 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1764 {
1765   tree base = *op_p, step = size_zero_node;
1766   struct iv *civ;
1767   struct ifs_ivopts_data ifs_ivopts_data;
1768
1769   /* Do not play with volatile memory references.  A bit too conservative,
1770      perhaps, but safe.  */
1771   if (gimple_has_volatile_ops (stmt))
1772     goto fail;
1773
1774   /* Ignore bitfields for now.  Not really something terribly complicated
1775      to handle.  TODO.  */
1776   if (TREE_CODE (base) == BIT_FIELD_REF)
1777     goto fail;
1778
1779   base = unshare_expr (base);
1780
1781   if (TREE_CODE (base) == TARGET_MEM_REF)
1782     {
1783       tree type = build_pointer_type (TREE_TYPE (base));
1784       tree astep;
1785
1786       if (TMR_BASE (base)
1787           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1788         {
1789           civ = get_iv (data, TMR_BASE (base));
1790           if (!civ)
1791             goto fail;
1792
1793           TMR_BASE (base) = civ->base;
1794           step = civ->step;
1795         }
1796       if (TMR_INDEX2 (base)
1797           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1798         {
1799           civ = get_iv (data, TMR_INDEX2 (base));
1800           if (!civ)
1801             goto fail;
1802
1803           TMR_INDEX2 (base) = civ->base;
1804           step = civ->step;
1805         }
1806       if (TMR_INDEX (base)
1807           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1808         {
1809           civ = get_iv (data, TMR_INDEX (base));
1810           if (!civ)
1811             goto fail;
1812
1813           TMR_INDEX (base) = civ->base;
1814           astep = civ->step;
1815
1816           if (astep)
1817             {
1818               if (TMR_STEP (base))
1819                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1820
1821               step = fold_build2 (PLUS_EXPR, type, step, astep);
1822             }
1823         }
1824
1825       if (integer_zerop (step))
1826         goto fail;
1827       base = tree_mem_ref_addr (type, base);
1828     }
1829   else
1830     {
1831       ifs_ivopts_data.ivopts_data = data;
1832       ifs_ivopts_data.stmt = stmt;
1833       ifs_ivopts_data.step = size_zero_node;
1834       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1835           || integer_zerop (ifs_ivopts_data.step))
1836         goto fail;
1837       step = ifs_ivopts_data.step;
1838
1839       /* Check that the base expression is addressable.  This needs
1840          to be done after substituting bases of IVs into it.  */
1841       if (may_be_nonaddressable_p (base))
1842         goto fail;
1843
1844       /* Moreover, on strict alignment platforms, check that it is
1845          sufficiently aligned.  */
1846       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1847         goto fail;
1848
1849       base = build_fold_addr_expr (base);
1850
1851       /* Substituting bases of IVs into the base expression might
1852          have caused folding opportunities.  */
1853       if (TREE_CODE (base) == ADDR_EXPR)
1854         {
1855           tree *ref = &TREE_OPERAND (base, 0);
1856           while (handled_component_p (*ref))
1857             ref = &TREE_OPERAND (*ref, 0);
1858           if (TREE_CODE (*ref) == MEM_REF)
1859             {
1860               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1861                                       TREE_OPERAND (*ref, 0),
1862                                       TREE_OPERAND (*ref, 1));
1863               if (tem)
1864                 *ref = tem;
1865             }
1866         }
1867     }
1868
1869   civ = alloc_iv (base, step);
1870   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1871   return;
1872
1873 fail:
1874   for_each_index (op_p, idx_record_use, data);
1875 }
1876
1877 /* Finds and records invariants used in STMT.  */
1878
1879 static void
1880 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1881 {
1882   ssa_op_iter iter;
1883   use_operand_p use_p;
1884   tree op;
1885
1886   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1887     {
1888       op = USE_FROM_PTR (use_p);
1889       record_invariant (data, op, false);
1890     }
1891 }
1892
1893 /* Finds interesting uses of induction variables in the statement STMT.  */
1894
1895 static void
1896 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1897 {
1898   struct iv *iv;
1899   tree op, *lhs, *rhs;
1900   ssa_op_iter iter;
1901   use_operand_p use_p;
1902   enum tree_code code;
1903
1904   find_invariants_stmt (data, stmt);
1905
1906   if (gimple_code (stmt) == GIMPLE_COND)
1907     {
1908       find_interesting_uses_cond (data, stmt);
1909       return;
1910     }
1911
1912   if (is_gimple_assign (stmt))
1913     {
1914       lhs = gimple_assign_lhs_ptr (stmt);
1915       rhs = gimple_assign_rhs1_ptr (stmt);
1916
1917       if (TREE_CODE (*lhs) == SSA_NAME)
1918         {
1919           /* If the statement defines an induction variable, the uses are not
1920              interesting by themselves.  */
1921
1922           iv = get_iv (data, *lhs);
1923
1924           if (iv && !integer_zerop (iv->step))
1925             return;
1926         }
1927
1928       code = gimple_assign_rhs_code (stmt);
1929       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1930           && (REFERENCE_CLASS_P (*rhs)
1931               || is_gimple_val (*rhs)))
1932         {
1933           if (REFERENCE_CLASS_P (*rhs))
1934             find_interesting_uses_address (data, stmt, rhs);
1935           else
1936             find_interesting_uses_op (data, *rhs);
1937
1938           if (REFERENCE_CLASS_P (*lhs))
1939             find_interesting_uses_address (data, stmt, lhs);
1940           return;
1941         }
1942       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1943         {
1944           find_interesting_uses_cond (data, stmt);
1945           return;
1946         }
1947
1948       /* TODO -- we should also handle address uses of type
1949
1950          memory = call (whatever);
1951
1952          and
1953
1954          call (memory).  */
1955     }
1956
1957   if (gimple_code (stmt) == GIMPLE_PHI
1958       && gimple_bb (stmt) == data->current_loop->header)
1959     {
1960       iv = get_iv (data, PHI_RESULT (stmt));
1961
1962       if (iv && !integer_zerop (iv->step))
1963         return;
1964     }
1965
1966   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1967     {
1968       op = USE_FROM_PTR (use_p);
1969
1970       if (TREE_CODE (op) != SSA_NAME)
1971         continue;
1972
1973       iv = get_iv (data, op);
1974       if (!iv)
1975         continue;
1976
1977       find_interesting_uses_op (data, op);
1978     }
1979 }
1980
1981 /* Finds interesting uses of induction variables outside of loops
1982    on loop exit edge EXIT.  */
1983
1984 static void
1985 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1986 {
1987   gimple phi;
1988   gimple_stmt_iterator psi;
1989   tree def;
1990
1991   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1992     {
1993       phi = gsi_stmt (psi);
1994       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1995       if (!virtual_operand_p (def))
1996         find_interesting_uses_op (data, def);
1997     }
1998 }
1999
2000 /* Finds uses of the induction variables that are interesting.  */
2001
2002 static void
2003 find_interesting_uses (struct ivopts_data *data)
2004 {
2005   basic_block bb;
2006   gimple_stmt_iterator bsi;
2007   basic_block *body = get_loop_body (data->current_loop);
2008   unsigned i;
2009   struct version_info *info;
2010   edge e;
2011
2012   if (dump_file && (dump_flags & TDF_DETAILS))
2013     fprintf (dump_file, "Uses:\n\n");
2014
2015   for (i = 0; i < data->current_loop->num_nodes; i++)
2016     {
2017       edge_iterator ei;
2018       bb = body[i];
2019
2020       FOR_EACH_EDGE (e, ei, bb->succs)
2021         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2022             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2023           find_interesting_uses_outside (data, e);
2024
2025       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2026         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2027       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2028         if (!is_gimple_debug (gsi_stmt (bsi)))
2029           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2030     }
2031
2032   if (dump_file && (dump_flags & TDF_DETAILS))
2033     {
2034       bitmap_iterator bi;
2035
2036       fprintf (dump_file, "\n");
2037
2038       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2039         {
2040           info = ver_info (data, i);
2041           if (info->inv_id)
2042             {
2043               fprintf (dump_file, "  ");
2044               print_generic_expr (dump_file, info->name, TDF_SLIM);
2045               fprintf (dump_file, " is invariant (%d)%s\n",
2046                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2047             }
2048         }
2049
2050       fprintf (dump_file, "\n");
2051     }
2052
2053   free (body);
2054 }
2055
2056 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2057    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2058    we are at the top-level of the processed address.  */
2059
2060 static tree
2061 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2062                 HOST_WIDE_INT *offset)
2063 {
2064   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2065   enum tree_code code;
2066   tree type, orig_type = TREE_TYPE (expr);
2067   HOST_WIDE_INT off0, off1, st;
2068   tree orig_expr = expr;
2069
2070   STRIP_NOPS (expr);
2071
2072   type = TREE_TYPE (expr);
2073   code = TREE_CODE (expr);
2074   *offset = 0;
2075
2076   switch (code)
2077     {
2078     case INTEGER_CST:
2079       if (!cst_and_fits_in_hwi (expr)
2080           || integer_zerop (expr))
2081         return orig_expr;
2082
2083       *offset = int_cst_value (expr);
2084       return build_int_cst (orig_type, 0);
2085
2086     case POINTER_PLUS_EXPR:
2087     case PLUS_EXPR:
2088     case MINUS_EXPR:
2089       op0 = TREE_OPERAND (expr, 0);
2090       op1 = TREE_OPERAND (expr, 1);
2091
2092       op0 = strip_offset_1 (op0, false, false, &off0);
2093       op1 = strip_offset_1 (op1, false, false, &off1);
2094
2095       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2096       if (op0 == TREE_OPERAND (expr, 0)
2097           && op1 == TREE_OPERAND (expr, 1))
2098         return orig_expr;
2099
2100       if (integer_zerop (op1))
2101         expr = op0;
2102       else if (integer_zerop (op0))
2103         {
2104           if (code == MINUS_EXPR)
2105             expr = fold_build1 (NEGATE_EXPR, type, op1);
2106           else
2107             expr = op1;
2108         }
2109       else
2110         expr = fold_build2 (code, type, op0, op1);
2111
2112       return fold_convert (orig_type, expr);
2113
2114     case MULT_EXPR:
2115       op1 = TREE_OPERAND (expr, 1);
2116       if (!cst_and_fits_in_hwi (op1))
2117         return orig_expr;
2118
2119       op0 = TREE_OPERAND (expr, 0);
2120       op0 = strip_offset_1 (op0, false, false, &off0);
2121       if (op0 == TREE_OPERAND (expr, 0))
2122         return orig_expr;
2123
2124       *offset = off0 * int_cst_value (op1);
2125       if (integer_zerop (op0))
2126         expr = op0;
2127       else
2128         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2129
2130       return fold_convert (orig_type, expr);
2131
2132     case ARRAY_REF:
2133     case ARRAY_RANGE_REF:
2134       if (!inside_addr)
2135         return orig_expr;
2136
2137       step = array_ref_element_size (expr);
2138       if (!cst_and_fits_in_hwi (step))
2139         break;
2140
2141       st = int_cst_value (step);
2142       op1 = TREE_OPERAND (expr, 1);
2143       op1 = strip_offset_1 (op1, false, false, &off1);
2144       *offset = off1 * st;
2145
2146       if (top_compref
2147           && integer_zerop (op1))
2148         {
2149           /* Strip the component reference completely.  */
2150           op0 = TREE_OPERAND (expr, 0);
2151           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2152           *offset += off0;
2153           return op0;
2154         }
2155       break;
2156
2157     case COMPONENT_REF:
2158       {
2159         tree field;
2160
2161         if (!inside_addr)
2162           return orig_expr;
2163
2164         tmp = component_ref_field_offset (expr);
2165         field = TREE_OPERAND (expr, 1);
2166         if (top_compref
2167             && cst_and_fits_in_hwi (tmp)
2168             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2169           {
2170             HOST_WIDE_INT boffset, abs_off;
2171
2172             /* Strip the component reference completely.  */
2173             op0 = TREE_OPERAND (expr, 0);
2174             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2175             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2176             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2177             if (boffset < 0)
2178               abs_off = -abs_off;
2179
2180             *offset = off0 + int_cst_value (tmp) + abs_off;
2181             return op0;
2182           }
2183       }
2184       break;
2185
2186     case ADDR_EXPR:
2187       op0 = TREE_OPERAND (expr, 0);
2188       op0 = strip_offset_1 (op0, true, true, &off0);
2189       *offset += off0;
2190
2191       if (op0 == TREE_OPERAND (expr, 0))
2192         return orig_expr;
2193
2194       expr = build_fold_addr_expr (op0);
2195       return fold_convert (orig_type, expr);
2196
2197     case MEM_REF:
2198       /* ???  Offset operand?  */
2199       inside_addr = false;
2200       break;
2201
2202     default:
2203       return orig_expr;
2204     }
2205
2206   /* Default handling of expressions for that we want to recurse into
2207      the first operand.  */
2208   op0 = TREE_OPERAND (expr, 0);
2209   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2210   *offset += off0;
2211
2212   if (op0 == TREE_OPERAND (expr, 0)
2213       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2214     return orig_expr;
2215
2216   expr = copy_node (expr);
2217   TREE_OPERAND (expr, 0) = op0;
2218   if (op1)
2219     TREE_OPERAND (expr, 1) = op1;
2220
2221   /* Inside address, we might strip the top level component references,
2222      thus changing type of the expression.  Handling of ADDR_EXPR
2223      will fix that.  */
2224   expr = fold_convert (orig_type, expr);
2225
2226   return expr;
2227 }
2228
2229 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2230
2231 static tree
2232 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2233 {
2234   HOST_WIDE_INT off;
2235   tree core = strip_offset_1 (expr, false, false, &off);
2236   *offset = off;
2237   return core;
2238 }
2239
2240 /* Returns variant of TYPE that can be used as base for different uses.
2241    We return unsigned type with the same precision, which avoids problems
2242    with overflows.  */
2243
2244 static tree
2245 generic_type_for (tree type)
2246 {
2247   if (POINTER_TYPE_P (type))
2248     return unsigned_type_for (type);
2249
2250   if (TYPE_UNSIGNED (type))
2251     return type;
2252
2253   return unsigned_type_for (type);
2254 }
2255
2256 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2257    the bitmap to that we should store it.  */
2258
2259 static struct ivopts_data *fd_ivopts_data;
2260 static tree
2261 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2262 {
2263   bitmap *depends_on = (bitmap *) data;
2264   struct version_info *info;
2265
2266   if (TREE_CODE (*expr_p) != SSA_NAME)
2267     return NULL_TREE;
2268   info = name_info (fd_ivopts_data, *expr_p);
2269
2270   if (!info->inv_id || info->has_nonlin_use)
2271     return NULL_TREE;
2272
2273   if (!*depends_on)
2274     *depends_on = BITMAP_ALLOC (NULL);
2275   bitmap_set_bit (*depends_on, info->inv_id);
2276
2277   return NULL_TREE;
2278 }
2279
2280 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2281    position to POS.  If USE is not NULL, the candidate is set as related to
2282    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2283    replacement of the final value of the iv by a direct computation.  */
2284
2285 static struct iv_cand *
2286 add_candidate_1 (struct ivopts_data *data,
2287                  tree base, tree step, bool important, enum iv_position pos,
2288                  struct iv_use *use, gimple incremented_at)
2289 {
2290   unsigned i;
2291   struct iv_cand *cand = NULL;
2292   tree type, orig_type;
2293
2294   /* For non-original variables, make sure their values are computed in a type
2295      that does not invoke undefined behavior on overflows (since in general,
2296      we cannot prove that these induction variables are non-wrapping).  */
2297   if (pos != IP_ORIGINAL)
2298     {
2299       orig_type = TREE_TYPE (base);
2300       type = generic_type_for (orig_type);
2301       if (type != orig_type)
2302         {
2303           base = fold_convert (type, base);
2304           step = fold_convert (type, step);
2305         }
2306     }
2307
2308   for (i = 0; i < n_iv_cands (data); i++)
2309     {
2310       cand = iv_cand (data, i);
2311
2312       if (cand->pos != pos)
2313         continue;
2314
2315       if (cand->incremented_at != incremented_at
2316           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2317               && cand->ainc_use != use))
2318         continue;
2319
2320       if (!cand->iv)
2321         {
2322           if (!base && !step)
2323             break;
2324
2325           continue;
2326         }
2327
2328       if (!base && !step)
2329         continue;
2330
2331       if (operand_equal_p (base, cand->iv->base, 0)
2332           && operand_equal_p (step, cand->iv->step, 0)
2333           && (TYPE_PRECISION (TREE_TYPE (base))
2334               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2335         break;
2336     }
2337
2338   if (i == n_iv_cands (data))
2339     {
2340       cand = XCNEW (struct iv_cand);
2341       cand->id = i;
2342
2343       if (!base && !step)
2344         cand->iv = NULL;
2345       else
2346         cand->iv = alloc_iv (base, step);
2347
2348       cand->pos = pos;
2349       if (pos != IP_ORIGINAL && cand->iv)
2350         {
2351           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2352           cand->var_after = cand->var_before;
2353         }
2354       cand->important = important;
2355       cand->incremented_at = incremented_at;
2356       data->iv_candidates.safe_push (cand);
2357
2358       if (step
2359           && TREE_CODE (step) != INTEGER_CST)
2360         {
2361           fd_ivopts_data = data;
2362           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2363         }
2364
2365       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2366         cand->ainc_use = use;
2367       else
2368         cand->ainc_use = NULL;
2369
2370       if (dump_file && (dump_flags & TDF_DETAILS))
2371         dump_cand (dump_file, cand);
2372     }
2373
2374   if (important && !cand->important)
2375     {
2376       cand->important = true;
2377       if (dump_file && (dump_flags & TDF_DETAILS))
2378         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2379     }
2380
2381   if (use)
2382     {
2383       bitmap_set_bit (use->related_cands, i);
2384       if (dump_file && (dump_flags & TDF_DETAILS))
2385         fprintf (dump_file, "Candidate %d is related to use %d\n",
2386                  cand->id, use->id);
2387     }
2388
2389   return cand;
2390 }
2391
2392 /* Returns true if incrementing the induction variable at the end of the LOOP
2393    is allowed.
2394
2395    The purpose is to avoid splitting latch edge with a biv increment, thus
2396    creating a jump, possibly confusing other optimization passes and leaving
2397    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2398    is not available (so we do not have a better alternative), or if the latch
2399    edge is already nonempty.  */
2400
2401 static bool
2402 allow_ip_end_pos_p (struct loop *loop)
2403 {
2404   if (!ip_normal_pos (loop))
2405     return true;
2406
2407   if (!empty_block_p (ip_end_pos (loop)))
2408     return true;
2409
2410   return false;
2411 }
2412
2413 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2414    Important field is set to IMPORTANT.  */
2415
2416 static void
2417 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2418                         bool important, struct iv_use *use)
2419 {
2420   basic_block use_bb = gimple_bb (use->stmt);
2421   enum machine_mode mem_mode;
2422   unsigned HOST_WIDE_INT cstepi;
2423
2424   /* If we insert the increment in any position other than the standard
2425      ones, we must ensure that it is incremented once per iteration.
2426      It must not be in an inner nested loop, or one side of an if
2427      statement.  */
2428   if (use_bb->loop_father != data->current_loop
2429       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2430       || stmt_could_throw_p (use->stmt)
2431       || !cst_and_fits_in_hwi (step))
2432     return;
2433
2434   cstepi = int_cst_value (step);
2435
2436   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2437   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2438         || USE_STORE_PRE_INCREMENT (mem_mode))
2439        && GET_MODE_SIZE (mem_mode) == cstepi)
2440       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2441            || USE_STORE_PRE_DECREMENT (mem_mode))
2442           && GET_MODE_SIZE (mem_mode) == -cstepi))
2443     {
2444       enum tree_code code = MINUS_EXPR;
2445       tree new_base;
2446       tree new_step = step;
2447
2448       if (POINTER_TYPE_P (TREE_TYPE (base)))
2449         {
2450           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2451           code = POINTER_PLUS_EXPR;
2452         }
2453       else
2454         new_step = fold_convert (TREE_TYPE (base), new_step);
2455       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2456       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2457                        use->stmt);
2458     }
2459   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2460         || USE_STORE_POST_INCREMENT (mem_mode))
2461        && GET_MODE_SIZE (mem_mode) == cstepi)
2462       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2463            || USE_STORE_POST_DECREMENT (mem_mode))
2464           && GET_MODE_SIZE (mem_mode) == -cstepi))
2465     {
2466       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2467                        use->stmt);
2468     }
2469 }
2470
2471 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2472    position to POS.  If USE is not NULL, the candidate is set as related to
2473    it.  The candidate computation is scheduled on all available positions.  */
2474
2475 static void
2476 add_candidate (struct ivopts_data *data,
2477                tree base, tree step, bool important, struct iv_use *use)
2478 {
2479   if (ip_normal_pos (data->current_loop))
2480     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2481   if (ip_end_pos (data->current_loop)
2482       && allow_ip_end_pos_p (data->current_loop))
2483     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2484
2485   if (use != NULL && use->type == USE_ADDRESS)
2486     add_autoinc_candidates (data, base, step, important, use);
2487 }
2488
2489 /* Adds standard iv candidates.  */
2490
2491 static void
2492 add_standard_iv_candidates (struct ivopts_data *data)
2493 {
2494   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2495
2496   /* The same for a double-integer type if it is still fast enough.  */
2497   if (TYPE_PRECISION
2498         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2499       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2500     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2501                    build_int_cst (long_integer_type_node, 1), true, NULL);
2502
2503   /* The same for a double-integer type if it is still fast enough.  */
2504   if (TYPE_PRECISION
2505         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2506       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2507     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2508                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2509 }
2510
2511
2512 /* Adds candidates bases on the old induction variable IV.  */
2513
2514 static void
2515 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2516 {
2517   gimple phi;
2518   tree def;
2519   struct iv_cand *cand;
2520
2521   add_candidate (data, iv->base, iv->step, true, NULL);
2522
2523   /* The same, but with initial value zero.  */
2524   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2525     add_candidate (data, size_int (0), iv->step, true, NULL);
2526   else
2527     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2528                    iv->step, true, NULL);
2529
2530   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2531   if (gimple_code (phi) == GIMPLE_PHI)
2532     {
2533       /* Additionally record the possibility of leaving the original iv
2534          untouched.  */
2535       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2536       /* Don't add candidate if it's from another PHI node because
2537          it's an affine iv appearing in the form of PEELED_CHREC.  */
2538       phi = SSA_NAME_DEF_STMT (def);
2539       if (gimple_code (phi) != GIMPLE_PHI)
2540         {
2541           cand = add_candidate_1 (data,
2542                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2543                                   SSA_NAME_DEF_STMT (def));
2544           cand->var_before = iv->ssa_name;
2545           cand->var_after = def;
2546         }
2547       else
2548         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2549     }
2550 }
2551
2552 /* Adds candidates based on the old induction variables.  */
2553
2554 static void
2555 add_old_ivs_candidates (struct ivopts_data *data)
2556 {
2557   unsigned i;
2558   struct iv *iv;
2559   bitmap_iterator bi;
2560
2561   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2562     {
2563       iv = ver_info (data, i)->iv;
2564       if (iv && iv->biv_p && !integer_zerop (iv->step))
2565         add_old_iv_candidates (data, iv);
2566     }
2567 }
2568
2569 /* Adds candidates based on the value of the induction variable IV and USE.  */
2570
2571 static void
2572 add_iv_value_candidates (struct ivopts_data *data,
2573                          struct iv *iv, struct iv_use *use)
2574 {
2575   unsigned HOST_WIDE_INT offset;
2576   tree base;
2577   tree basetype;
2578
2579   add_candidate (data, iv->base, iv->step, false, use);
2580
2581   /* The same, but with initial value zero.  Make such variable important,
2582      since it is generic enough so that possibly many uses may be based
2583      on it.  */
2584   basetype = TREE_TYPE (iv->base);
2585   if (POINTER_TYPE_P (basetype))
2586     basetype = sizetype;
2587   add_candidate (data, build_int_cst (basetype, 0),
2588                  iv->step, true, use);
2589
2590   /* Third, try removing the constant offset.  Make sure to even
2591      add a candidate for &a[0] vs. (T *)&a.  */
2592   base = strip_offset (iv->base, &offset);
2593   if (offset
2594       || base != iv->base)
2595     add_candidate (data, base, iv->step, false, use);
2596 }
2597
2598 /* Adds candidates based on the uses.  */
2599
2600 static void
2601 add_derived_ivs_candidates (struct ivopts_data *data)
2602 {
2603   unsigned i;
2604
2605   for (i = 0; i < n_iv_uses (data); i++)
2606     {
2607       struct iv_use *use = iv_use (data, i);
2608
2609       if (!use)
2610         continue;
2611
2612       switch (use->type)
2613         {
2614         case USE_NONLINEAR_EXPR:
2615         case USE_COMPARE:
2616         case USE_ADDRESS:
2617           /* Just add the ivs based on the value of the iv used here.  */
2618           add_iv_value_candidates (data, use->iv, use);
2619           break;
2620
2621         default:
2622           gcc_unreachable ();
2623         }
2624     }
2625 }
2626
2627 /* Record important candidates and add them to related_cands bitmaps
2628    if needed.  */
2629
2630 static void
2631 record_important_candidates (struct ivopts_data *data)
2632 {
2633   unsigned i;
2634   struct iv_use *use;
2635
2636   for (i = 0; i < n_iv_cands (data); i++)
2637     {
2638       struct iv_cand *cand = iv_cand (data, i);
2639
2640       if (cand->important)
2641         bitmap_set_bit (data->important_candidates, i);
2642     }
2643
2644   data->consider_all_candidates = (n_iv_cands (data)
2645                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2646
2647   if (data->consider_all_candidates)
2648     {
2649       /* We will not need "related_cands" bitmaps in this case,
2650          so release them to decrease peak memory consumption.  */
2651       for (i = 0; i < n_iv_uses (data); i++)
2652         {
2653           use = iv_use (data, i);
2654           BITMAP_FREE (use->related_cands);
2655         }
2656     }
2657   else
2658     {
2659       /* Add important candidates to the related_cands bitmaps.  */
2660       for (i = 0; i < n_iv_uses (data); i++)
2661         bitmap_ior_into (iv_use (data, i)->related_cands,
2662                          data->important_candidates);
2663     }
2664 }
2665
2666 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2667    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2668    we allocate a simple list to every use.  */
2669
2670 static void
2671 alloc_use_cost_map (struct ivopts_data *data)
2672 {
2673   unsigned i, size, s;
2674
2675   for (i = 0; i < n_iv_uses (data); i++)
2676     {
2677       struct iv_use *use = iv_use (data, i);
2678
2679       if (data->consider_all_candidates)
2680         size = n_iv_cands (data);
2681       else
2682         {
2683           s = bitmap_count_bits (use->related_cands);
2684
2685           /* Round up to the power of two, so that moduling by it is fast.  */
2686           size = s ? (1 << ceil_log2 (s)) : 1;
2687         }
2688
2689       use->n_map_members = size;
2690       use->cost_map = XCNEWVEC (struct cost_pair, size);
2691     }
2692 }
2693
2694 /* Returns description of computation cost of expression whose runtime
2695    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2696
2697 static comp_cost
2698 new_cost (unsigned runtime, unsigned complexity)
2699 {
2700   comp_cost cost;
2701
2702   cost.cost = runtime;
2703   cost.complexity = complexity;
2704
2705   return cost;
2706 }
2707
2708 /* Adds costs COST1 and COST2.  */
2709
2710 static comp_cost
2711 add_costs (comp_cost cost1, comp_cost cost2)
2712 {
2713   cost1.cost += cost2.cost;
2714   cost1.complexity += cost2.complexity;
2715
2716   return cost1;
2717 }
2718 /* Subtracts costs COST1 and COST2.  */
2719
2720 static comp_cost
2721 sub_costs (comp_cost cost1, comp_cost cost2)
2722 {
2723   cost1.cost -= cost2.cost;
2724   cost1.complexity -= cost2.complexity;
2725
2726   return cost1;
2727 }
2728
2729 /* Returns a negative number if COST1 < COST2, a positive number if
2730    COST1 > COST2, and 0 if COST1 = COST2.  */
2731
2732 static int
2733 compare_costs (comp_cost cost1, comp_cost cost2)
2734 {
2735   if (cost1.cost == cost2.cost)
2736     return cost1.complexity - cost2.complexity;
2737
2738   return cost1.cost - cost2.cost;
2739 }
2740
2741 /* Returns true if COST is infinite.  */
2742
2743 static bool
2744 infinite_cost_p (comp_cost cost)
2745 {
2746   return cost.cost == INFTY;
2747 }
2748
2749 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2750    on invariants DEPENDS_ON and that the value used in expressing it
2751    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2752
2753 static void
2754 set_use_iv_cost (struct ivopts_data *data,
2755                  struct iv_use *use, struct iv_cand *cand,
2756                  comp_cost cost, bitmap depends_on, tree value,
2757                  enum tree_code comp, int inv_expr_id)
2758 {
2759   unsigned i, s;
2760
2761   if (infinite_cost_p (cost))
2762     {
2763       BITMAP_FREE (depends_on);
2764       return;
2765     }
2766
2767   if (data->consider_all_candidates)
2768     {
2769       use->cost_map[cand->id].cand = cand;
2770       use->cost_map[cand->id].cost = cost;
2771       use->cost_map[cand->id].depends_on = depends_on;
2772       use->cost_map[cand->id].value = value;
2773       use->cost_map[cand->id].comp = comp;
2774       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2775       return;
2776     }
2777
2778   /* n_map_members is a power of two, so this computes modulo.  */
2779   s = cand->id & (use->n_map_members - 1);
2780   for (i = s; i < use->n_map_members; i++)
2781     if (!use->cost_map[i].cand)
2782       goto found;
2783   for (i = 0; i < s; i++)
2784     if (!use->cost_map[i].cand)
2785       goto found;
2786
2787   gcc_unreachable ();
2788
2789 found:
2790   use->cost_map[i].cand = cand;
2791   use->cost_map[i].cost = cost;
2792   use->cost_map[i].depends_on = depends_on;
2793   use->cost_map[i].value = value;
2794   use->cost_map[i].comp = comp;
2795   use->cost_map[i].inv_expr_id = inv_expr_id;
2796 }
2797
2798 /* Gets cost of (USE, CANDIDATE) pair.  */
2799
2800 static struct cost_pair *
2801 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2802                  struct iv_cand *cand)
2803 {
2804   unsigned i, s;
2805   struct cost_pair *ret;
2806
2807   if (!cand)
2808     return NULL;
2809
2810   if (data->consider_all_candidates)
2811     {
2812       ret = use->cost_map + cand->id;
2813       if (!ret->cand)
2814         return NULL;
2815
2816       return ret;
2817     }
2818
2819   /* n_map_members is a power of two, so this computes modulo.  */
2820   s = cand->id & (use->n_map_members - 1);
2821   for (i = s; i < use->n_map_members; i++)
2822     if (use->cost_map[i].cand == cand)
2823       return use->cost_map + i;
2824     else if (use->cost_map[i].cand == NULL)
2825       return NULL;
2826   for (i = 0; i < s; i++)
2827     if (use->cost_map[i].cand == cand)
2828       return use->cost_map + i;
2829     else if (use->cost_map[i].cand == NULL)
2830       return NULL;
2831
2832   return NULL;
2833 }
2834
2835 /* Returns estimate on cost of computing SEQ.  */
2836
2837 static unsigned
2838 seq_cost (rtx seq, bool speed)
2839 {
2840   unsigned cost = 0;
2841   rtx set;
2842
2843   for (; seq; seq = NEXT_INSN (seq))
2844     {
2845       set = single_set (seq);
2846       if (set)
2847         cost += set_src_cost (SET_SRC (set), speed);
2848       else
2849         cost++;
2850     }
2851
2852   return cost;
2853 }
2854
2855 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2856 static rtx
2857 produce_memory_decl_rtl (tree obj, int *regno)
2858 {
2859   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2860   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2861   rtx x;
2862
2863   gcc_assert (obj);
2864   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2865     {
2866       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2867       x = gen_rtx_SYMBOL_REF (address_mode, name);
2868       SET_SYMBOL_REF_DECL (x, obj);
2869       x = gen_rtx_MEM (DECL_MODE (obj), x);
2870       set_mem_addr_space (x, as);
2871       targetm.encode_section_info (obj, x, true);
2872     }
2873   else
2874     {
2875       x = gen_raw_REG (address_mode, (*regno)++);
2876       x = gen_rtx_MEM (DECL_MODE (obj), x);
2877       set_mem_addr_space (x, as);
2878     }
2879
2880   return x;
2881 }
2882
2883 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2884    walk_tree.  DATA contains the actual fake register number.  */
2885
2886 static tree
2887 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2888 {
2889   tree obj = NULL_TREE;
2890   rtx x = NULL_RTX;
2891   int *regno = (int *) data;
2892
2893   switch (TREE_CODE (*expr_p))
2894     {
2895     case ADDR_EXPR:
2896       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2897            handled_component_p (*expr_p);
2898            expr_p = &TREE_OPERAND (*expr_p, 0))
2899         continue;
2900       obj = *expr_p;
2901       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2902         x = produce_memory_decl_rtl (obj, regno);
2903       break;
2904
2905     case SSA_NAME:
2906       *ws = 0;
2907       obj = SSA_NAME_VAR (*expr_p);
2908       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2909       if (!obj)
2910         return NULL_TREE;
2911       if (!DECL_RTL_SET_P (obj))
2912         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2913       break;
2914
2915     case VAR_DECL:
2916     case PARM_DECL:
2917     case RESULT_DECL:
2918       *ws = 0;
2919       obj = *expr_p;
2920
2921       if (DECL_RTL_SET_P (obj))
2922         break;
2923
2924       if (DECL_MODE (obj) == BLKmode)
2925         x = produce_memory_decl_rtl (obj, regno);
2926       else
2927         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2928
2929       break;
2930
2931     default:
2932       break;
2933     }
2934
2935   if (x)
2936     {
2937       decl_rtl_to_reset.safe_push (obj);
2938       SET_DECL_RTL (obj, x);
2939     }
2940
2941   return NULL_TREE;
2942 }
2943
2944 /* Determines cost of the computation of EXPR.  */
2945
2946 static unsigned
2947 computation_cost (tree expr, bool speed)
2948 {
2949   rtx seq, rslt;
2950   tree type = TREE_TYPE (expr);
2951   unsigned cost;
2952   /* Avoid using hard regs in ways which may be unsupported.  */
2953   int regno = LAST_VIRTUAL_REGISTER + 1;
2954   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2955   enum node_frequency real_frequency = node->frequency;
2956
2957   node->frequency = NODE_FREQUENCY_NORMAL;
2958   crtl->maybe_hot_insn_p = speed;
2959   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2960   start_sequence ();
2961   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2962   seq = get_insns ();
2963   end_sequence ();
2964   default_rtl_profile ();
2965   node->frequency = real_frequency;
2966
2967   cost = seq_cost (seq, speed);
2968   if (MEM_P (rslt))
2969     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2970                           TYPE_ADDR_SPACE (type), speed);
2971   else if (!REG_P (rslt))
2972     cost += set_src_cost (rslt, speed);
2973
2974   return cost;
2975 }
2976
2977 /* Returns variable containing the value of candidate CAND at statement AT.  */
2978
2979 static tree
2980 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2981 {
2982   if (stmt_after_increment (loop, cand, stmt))
2983     return cand->var_after;
2984   else
2985     return cand->var_before;
2986 }
2987
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989    same precision that is at least as wide as the precision of TYPE, stores
2990    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2991    type of A and B.  */
2992
2993 static tree
2994 determine_common_wider_type (tree *a, tree *b)
2995 {
2996   tree wider_type = NULL;
2997   tree suba, subb;
2998   tree atype = TREE_TYPE (*a);
2999
3000   if (CONVERT_EXPR_P (*a))
3001     {
3002       suba = TREE_OPERAND (*a, 0);
3003       wider_type = TREE_TYPE (suba);
3004       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3005         return atype;
3006     }
3007   else
3008     return atype;
3009
3010   if (CONVERT_EXPR_P (*b))
3011     {
3012       subb = TREE_OPERAND (*b, 0);
3013       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3014         return atype;
3015     }
3016   else
3017     return atype;
3018
3019   *a = suba;
3020   *b = subb;
3021   return wider_type;
3022 }
3023
3024 /* Determines the expression by that USE is expressed from induction variable
3025    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3026    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3027
3028 static bool
3029 get_computation_aff (struct loop *loop,
3030                      struct iv_use *use, struct iv_cand *cand, gimple at,
3031                      struct aff_tree *aff)
3032 {
3033   tree ubase = use->iv->base;
3034   tree ustep = use->iv->step;
3035   tree cbase = cand->iv->base;
3036   tree cstep = cand->iv->step, cstep_common;
3037   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3038   tree common_type, var;
3039   tree uutype;
3040   aff_tree cbase_aff, var_aff;
3041   double_int rat;
3042
3043   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3044     {
3045       /* We do not have a precision to express the values of use.  */
3046       return false;
3047     }
3048
3049   var = var_at_stmt (loop, cand, at);
3050   uutype = unsigned_type_for (utype);
3051
3052   /* If the conversion is not noop, perform it.  */
3053   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3054     {
3055       cstep = fold_convert (uutype, cstep);
3056       cbase = fold_convert (uutype, cbase);
3057       var = fold_convert (uutype, var);
3058     }
3059
3060   if (!constant_multiple_of (ustep, cstep, &rat))
3061     return false;
3062
3063   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3064      type, we achieve better folding by computing their difference in this
3065      wider type, and cast the result to UUTYPE.  We do not need to worry about
3066      overflows, as all the arithmetics will in the end be performed in UUTYPE
3067      anyway.  */
3068   common_type = determine_common_wider_type (&ubase, &cbase);
3069
3070   /* use = ubase - ratio * cbase + ratio * var.  */
3071   tree_to_aff_combination (ubase, common_type, aff);
3072   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3073   tree_to_aff_combination (var, uutype, &var_aff);
3074
3075   /* We need to shift the value if we are after the increment.  */
3076   if (stmt_after_increment (loop, cand, at))
3077     {
3078       aff_tree cstep_aff;
3079
3080       if (common_type != uutype)
3081         cstep_common = fold_convert (common_type, cstep);
3082       else
3083         cstep_common = cstep;
3084
3085       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3086       aff_combination_add (&cbase_aff, &cstep_aff);
3087     }
3088
3089   aff_combination_scale (&cbase_aff, -rat);
3090   aff_combination_add (aff, &cbase_aff);
3091   if (common_type != uutype)
3092     aff_combination_convert (aff, uutype);
3093
3094   aff_combination_scale (&var_aff, rat);
3095   aff_combination_add (aff, &var_aff);
3096
3097   return true;
3098 }
3099
3100 /* Return the type of USE.  */
3101
3102 static tree
3103 get_use_type (struct iv_use *use)
3104 {
3105   tree base_type = TREE_TYPE (use->iv->base);
3106   tree type;
3107
3108   if (use->type == USE_ADDRESS)
3109     {
3110       /* The base_type may be a void pointer.  Create a pointer type based on
3111          the mem_ref instead.  */
3112       type = build_pointer_type (TREE_TYPE (*use->op_p));
3113       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3114                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3115     }
3116   else
3117     type = base_type;
3118
3119   return type;
3120 }
3121
3122 /* Determines the expression by that USE is expressed from induction variable
3123    CAND at statement AT in LOOP.  The computation is unshared.  */
3124
3125 static tree
3126 get_computation_at (struct loop *loop,
3127                     struct iv_use *use, struct iv_cand *cand, gimple at)
3128 {
3129   aff_tree aff;
3130   tree type = get_use_type (use);
3131
3132   if (!get_computation_aff (loop, use, cand, at, &aff))
3133     return NULL_TREE;
3134   unshare_aff_combination (&aff);
3135   return fold_convert (type, aff_combination_to_tree (&aff));
3136 }
3137
3138 /* Determines the expression by that USE is expressed from induction variable
3139    CAND in LOOP.  The computation is unshared.  */
3140
3141 static tree
3142 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3143 {
3144   return get_computation_at (loop, use, cand, use->stmt);
3145 }
3146
3147 /* Adjust the cost COST for being in loop setup rather than loop body.
3148    If we're optimizing for space, the loop setup overhead is constant;
3149    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3150 static unsigned
3151 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3152 {
3153   if (cost == INFTY)
3154     return cost;
3155   else if (optimize_loop_for_speed_p (data->current_loop))
3156     return cost / avg_loop_niter (data->current_loop);
3157   else
3158     return cost;
3159 }
3160
3161 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3162    validity for a memory reference accessing memory of mode MODE in
3163    address space AS.  */
3164
3165
3166 bool
3167 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3168                                  addr_space_t as)
3169 {
3170 #define MAX_RATIO 128
3171   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3172   static vec<sbitmap> valid_mult_list;
3173   sbitmap valid_mult;
3174
3175   if (data_index >= valid_mult_list.length ())
3176     valid_mult_list.safe_grow_cleared (data_index + 1);
3177
3178   valid_mult = valid_mult_list[data_index];
3179   if (!valid_mult)
3180     {
3181       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3182       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3183       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3184       rtx addr, scaled;
3185       HOST_WIDE_INT i;
3186
3187       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3188       bitmap_clear (valid_mult);
3189       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3190       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3191       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3192         {
3193           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3194           if (memory_address_addr_space_p (mode, addr, as)
3195               || memory_address_addr_space_p (mode, scaled, as))
3196             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3197         }
3198
3199       if (dump_file && (dump_flags & TDF_DETAILS))
3200         {
3201           fprintf (dump_file, "  allowed multipliers:");
3202           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3203             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3204               fprintf (dump_file, " %d", (int) i);
3205           fprintf (dump_file, "\n");
3206           fprintf (dump_file, "\n");
3207         }
3208
3209       valid_mult_list[data_index] = valid_mult;
3210     }
3211
3212   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3213     return false;
3214
3215   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3216 }
3217
3218 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3219    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3220    variable is omitted.  Compute the cost for a memory reference that accesses
3221    a memory location of mode MEM_MODE in address space AS.
3222
3223    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3224    size of MEM_MODE / RATIO) is available.  To make this determination, we
3225    look at the size of the increment to be made, which is given in CSTEP.
3226    CSTEP may be zero if the step is unknown.
3227    STMT_AFTER_INC is true iff the statement we're looking at is after the
3228    increment of the original biv.
3229
3230    TODO -- there must be some better way.  This all is quite crude.  */
3231
3232 enum ainc_type
3233 {
3234   AINC_PRE_INC,         /* Pre increment.  */
3235   AINC_PRE_DEC,         /* Pre decrement.  */
3236   AINC_POST_INC,        /* Post increment.  */
3237   AINC_POST_DEC,        /* Post decrement.  */
3238   AINC_NONE             /* Also the number of auto increment types.  */
3239 };
3240
3241 typedef struct address_cost_data_s
3242 {
3243   HOST_WIDE_INT min_offset, max_offset;
3244   unsigned costs[2][2][2][2];
3245   unsigned ainc_costs[AINC_NONE];
3246 } *address_cost_data;
3247
3248
3249 static comp_cost
3250 get_address_cost (bool symbol_present, bool var_present,
3251                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3252                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3253                   addr_space_t as, bool speed,
3254                   bool stmt_after_inc, bool *may_autoinc)
3255 {
3256   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3257   static vec<address_cost_data> address_cost_data_list;
3258   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3259   address_cost_data data;
3260   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3261   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3262   unsigned cost, acost, complexity;
3263   enum ainc_type autoinc_type;
3264   bool offset_p, ratio_p, autoinc;
3265   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3266   unsigned HOST_WIDE_INT mask;
3267   unsigned bits;
3268
3269   if (data_index >= address_cost_data_list.length ())
3270     address_cost_data_list.safe_grow_cleared (data_index + 1);
3271
3272   data = address_cost_data_list[data_index];
3273   if (!data)
3274     {
3275       HOST_WIDE_INT i;
3276       HOST_WIDE_INT rat, off = 0;
3277       int old_cse_not_expected, width;
3278       unsigned sym_p, var_p, off_p, rat_p, add_c;
3279       rtx seq, addr, base;
3280       rtx reg0, reg1;
3281
3282       data = (address_cost_data) xcalloc (1, sizeof (*data));
3283
3284       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3285
3286       width = GET_MODE_BITSIZE (address_mode) - 1;
3287       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3288         width = HOST_BITS_PER_WIDE_INT - 1;
3289       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3290
3291       for (i = width; i >= 0; i--)
3292         {
3293           off = -((unsigned HOST_WIDE_INT) 1 << i);
3294           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3295           if (memory_address_addr_space_p (mem_mode, addr, as))
3296             break;
3297         }
3298       data->min_offset = (i == -1? 0 : off);
3299
3300       for (i = width; i >= 0; i--)
3301         {
3302           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3303           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3304           if (memory_address_addr_space_p (mem_mode, addr, as))
3305             break;
3306         }
3307       if (i == -1)
3308         off = 0;
3309       data->max_offset = off;
3310
3311       if (dump_file && (dump_flags & TDF_DETAILS))
3312         {
3313           fprintf (dump_file, "get_address_cost:\n");
3314           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3315                    GET_MODE_NAME (mem_mode),
3316                    data->min_offset);
3317           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3318                    GET_MODE_NAME (mem_mode),
3319                    data->max_offset);
3320         }
3321
3322       rat = 1;
3323       for (i = 2; i <= MAX_RATIO; i++)
3324         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3325           {
3326             rat = i;
3327             break;
3328           }
3329
3330       /* Compute the cost of various addressing modes.  */
3331       acost = 0;
3332       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3333       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3334
3335       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3336           || USE_STORE_PRE_DECREMENT (mem_mode))
3337         {
3338           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3339           has_predec[mem_mode]
3340             = memory_address_addr_space_p (mem_mode, addr, as);
3341
3342           if (has_predec[mem_mode])
3343             data->ainc_costs[AINC_PRE_DEC]
3344               = address_cost (addr, mem_mode, as, speed);
3345         }
3346       if (USE_LOAD_POST_DECREMENT (mem_mode)
3347           || USE_STORE_POST_DECREMENT (mem_mode))
3348         {
3349           addr = gen_rtx_POST_DEC (address_mode, reg0);
3350           has_postdec[mem_mode]
3351             = memory_address_addr_space_p (mem_mode, addr, as);
3352
3353           if (has_postdec[mem_mode])
3354             data->ainc_costs[AINC_POST_DEC]
3355               = address_cost (addr, mem_mode, as, speed);
3356         }
3357       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3358           || USE_STORE_PRE_DECREMENT (mem_mode))
3359         {
3360           addr = gen_rtx_PRE_INC (address_mode, reg0);
3361           has_preinc[mem_mode]
3362             = memory_address_addr_space_p (mem_mode, addr, as);
3363
3364           if (has_preinc[mem_mode])
3365             data->ainc_costs[AINC_PRE_INC]
3366               = address_cost (addr, mem_mode, as, speed);
3367         }
3368       if (USE_LOAD_POST_INCREMENT (mem_mode)
3369           || USE_STORE_POST_INCREMENT (mem_mode))
3370         {
3371           addr = gen_rtx_POST_INC (address_mode, reg0);
3372           has_postinc[mem_mode]
3373             = memory_address_addr_space_p (mem_mode, addr, as);
3374
3375           if (has_postinc[mem_mode])
3376             data->ainc_costs[AINC_POST_INC]
3377               = address_cost (addr, mem_mode, as, speed);
3378         }
3379       for (i = 0; i < 16; i++)
3380         {
3381           sym_p = i & 1;
3382           var_p = (i >> 1) & 1;
3383           off_p = (i >> 2) & 1;
3384           rat_p = (i >> 3) & 1;
3385
3386           addr = reg0;
3387           if (rat_p)
3388             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3389                                    gen_int_mode (rat, address_mode));
3390
3391           if (var_p)
3392             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3393
3394           if (sym_p)
3395             {
3396               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3397               /* ??? We can run into trouble with some backends by presenting
3398                  it with symbols which haven't been properly passed through
3399                  targetm.encode_section_info.  By setting the local bit, we
3400                  enhance the probability of things working.  */
3401               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3402
3403               if (off_p)
3404                 base = gen_rtx_fmt_e (CONST, address_mode,
3405                                       gen_rtx_fmt_ee
3406                                         (PLUS, address_mode, base,
3407                                          gen_int_mode (off, address_mode)));
3408             }
3409           else if (off_p)
3410             base = gen_int_mode (off, address_mode);
3411           else
3412             base = NULL_RTX;
3413
3414           if (base)
3415             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3416
3417           start_sequence ();
3418           /* To avoid splitting addressing modes, pretend that no cse will
3419              follow.  */
3420           old_cse_not_expected = cse_not_expected;
3421           cse_not_expected = true;
3422           addr = memory_address_addr_space (mem_mode, addr, as);
3423           cse_not_expected = old_cse_not_expected;
3424           seq = get_insns ();
3425           end_sequence ();
3426
3427           acost = seq_cost (seq, speed);
3428           acost += address_cost (addr, mem_mode, as, speed);
3429
3430           if (!acost)
3431             acost = 1;
3432           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3433         }
3434
3435       /* On some targets, it is quite expensive to load symbol to a register,
3436          which makes addresses that contain symbols look much more expensive.
3437          However, the symbol will have to be loaded in any case before the
3438          loop (and quite likely we have it in register already), so it does not
3439          make much sense to penalize them too heavily.  So make some final
3440          tweaks for the SYMBOL_PRESENT modes:
3441
3442          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3443          var is cheaper, use this mode with small penalty.
3444          If VAR_PRESENT is true, try whether the mode with
3445          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3446          if this is the case, use it.  */
3447       add_c = add_cost (speed, address_mode);
3448       for (i = 0; i < 8; i++)
3449         {
3450           var_p = i & 1;
3451           off_p = (i >> 1) & 1;
3452           rat_p = (i >> 2) & 1;
3453
3454           acost = data->costs[0][1][off_p][rat_p] + 1;
3455           if (var_p)
3456             acost += add_c;
3457
3458           if (acost < data->costs[1][var_p][off_p][rat_p])
3459             data->costs[1][var_p][off_p][rat_p] = acost;
3460         }
3461
3462       if (dump_file && (dump_flags & TDF_DETAILS))
3463         {
3464           fprintf (dump_file, "Address costs:\n");
3465
3466           for (i = 0; i < 16; i++)
3467             {
3468               sym_p = i & 1;
3469               var_p = (i >> 1) & 1;
3470               off_p = (i >> 2) & 1;
3471               rat_p = (i >> 3) & 1;
3472
3473               fprintf (dump_file, "  ");
3474               if (sym_p)
3475                 fprintf (dump_file, "sym + ");
3476               if (var_p)
3477                 fprintf (dump_file, "var + ");
3478               if (off_p)
3479                 fprintf (dump_file, "cst + ");
3480               if (rat_p)
3481                 fprintf (dump_file, "rat * ");
3482
3483               acost = data->costs[sym_p][var_p][off_p][rat_p];
3484               fprintf (dump_file, "index costs %d\n", acost);
3485             }
3486           if (has_predec[mem_mode] || has_postdec[mem_mode]
3487               || has_preinc[mem_mode] || has_postinc[mem_mode])
3488             fprintf (dump_file, "  May include autoinc/dec\n");
3489           fprintf (dump_file, "\n");
3490         }
3491
3492       address_cost_data_list[data_index] = data;
3493     }
3494
3495   bits = GET_MODE_BITSIZE (address_mode);
3496   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3497   offset &= mask;
3498   if ((offset >> (bits - 1) & 1))
3499     offset |= ~mask;
3500   s_offset = offset;
3501
3502   autoinc = false;
3503   autoinc_type = AINC_NONE;
3504   msize = GET_MODE_SIZE (mem_mode);
3505   autoinc_offset = offset;
3506   if (stmt_after_inc)
3507     autoinc_offset += ratio * cstep;
3508   if (symbol_present || var_present || ratio != 1)
3509     autoinc = false;
3510   else
3511     {
3512       if (has_postinc[mem_mode] && autoinc_offset == 0
3513           && msize == cstep)
3514         autoinc_type = AINC_POST_INC;
3515       else if (has_postdec[mem_mode] && autoinc_offset == 0
3516                && msize == -cstep)
3517         autoinc_type = AINC_POST_DEC;
3518       else if (has_preinc[mem_mode] && autoinc_offset == msize
3519                && msize == cstep)
3520         autoinc_type = AINC_PRE_INC;
3521       else if (has_predec[mem_mode] && autoinc_offset == -msize
3522                && msize == -cstep)
3523         autoinc_type = AINC_PRE_DEC;
3524
3525       if (autoinc_type != AINC_NONE)
3526         autoinc = true;
3527     }
3528
3529   cost = 0;
3530   offset_p = (s_offset != 0
3531               && data->min_offset <= s_offset
3532               && s_offset <= data->max_offset);
3533   ratio_p = (ratio != 1
3534              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3535
3536   if (ratio != 1 && !ratio_p)
3537     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3538
3539   if (s_offset && !offset_p && !symbol_present)
3540     cost += add_cost (speed, address_mode);
3541
3542   if (may_autoinc)
3543     *may_autoinc = autoinc;
3544   if (autoinc)
3545     acost = data->ainc_costs[autoinc_type];
3546   else
3547     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3548   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3549   return new_cost (cost + acost, complexity);
3550 }
3551
3552  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3553     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3554     calculating the operands of EXPR.  Returns true if successful, and returns
3555     the cost in COST.  */
3556
3557 static bool
3558 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3559                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3560 {
3561   comp_cost res;
3562   tree op1 = TREE_OPERAND (expr, 1);
3563   tree cst = TREE_OPERAND (mult, 1);
3564   tree multop = TREE_OPERAND (mult, 0);
3565   int m = exact_log2 (int_cst_value (cst));
3566   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3567   int sa_cost;
3568   bool equal_p = false;
3569
3570   if (!(m >= 0 && m < maxm))
3571     return false;
3572
3573   if (operand_equal_p (op1, mult, 0))
3574     equal_p = true;
3575
3576   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3577              ? shiftadd_cost (speed, mode, m)
3578              : (equal_p
3579                 ? shiftsub1_cost (speed, mode, m)
3580                 : shiftsub0_cost (speed, mode, m)));
3581   res = new_cost (sa_cost, 0);
3582   res = add_costs (res, equal_p ? cost0 : cost1);
3583
3584   STRIP_NOPS (multop);
3585   if (!is_gimple_val (multop))
3586     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3587
3588   *cost = res;
3589   return true;
3590 }
3591
3592 /* Estimates cost of forcing expression EXPR into a variable.  */
3593
3594 static comp_cost
3595 force_expr_to_var_cost (tree expr, bool speed)
3596 {
3597   static bool costs_initialized = false;
3598   static unsigned integer_cost [2];
3599   static unsigned symbol_cost [2];
3600   static unsigned address_cost [2];
3601   tree op0, op1;
3602   comp_cost cost0, cost1, cost;
3603   enum machine_mode mode;
3604
3605   if (!costs_initialized)
3606     {
3607       tree type = build_pointer_type (integer_type_node);
3608       tree var, addr;
3609       rtx x;
3610       int i;
3611
3612       var = create_tmp_var_raw (integer_type_node, "test_var");
3613       TREE_STATIC (var) = 1;
3614       x = produce_memory_decl_rtl (var, NULL);
3615       SET_DECL_RTL (var, x);
3616
3617       addr = build1 (ADDR_EXPR, type, var);
3618
3619
3620       for (i = 0; i < 2; i++)
3621         {
3622           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3623                                                              2000), i);
3624
3625           symbol_cost[i] = computation_cost (addr, i) + 1;
3626
3627           address_cost[i]
3628             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3629           if (dump_file && (dump_flags & TDF_DETAILS))
3630             {
3631               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3632               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3633               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3634               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3635               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3636               fprintf (dump_file, "\n");
3637             }
3638         }
3639
3640       costs_initialized = true;
3641     }
3642
3643   STRIP_NOPS (expr);
3644
3645   if (SSA_VAR_P (expr))
3646     return no_cost;
3647
3648   if (is_gimple_min_invariant (expr))
3649     {
3650       if (TREE_CODE (expr) == INTEGER_CST)
3651         return new_cost (integer_cost [speed], 0);
3652
3653       if (TREE_CODE (expr) == ADDR_EXPR)
3654         {
3655           tree obj = TREE_OPERAND (expr, 0);
3656
3657           if (TREE_CODE (obj) == VAR_DECL
3658               || TREE_CODE (obj) == PARM_DECL
3659               || TREE_CODE (obj) == RESULT_DECL)
3660             return new_cost (symbol_cost [speed], 0);
3661         }
3662
3663       return new_cost (address_cost [speed], 0);
3664     }
3665
3666   switch (TREE_CODE (expr))
3667     {
3668     case POINTER_PLUS_EXPR:
3669     case PLUS_EXPR:
3670     case MINUS_EXPR:
3671     case MULT_EXPR:
3672       op0 = TREE_OPERAND (expr, 0);
3673       op1 = TREE_OPERAND (expr, 1);
3674       STRIP_NOPS (op0);
3675       STRIP_NOPS (op1);
3676       break;
3677
3678     CASE_CONVERT:
3679     case NEGATE_EXPR:
3680       op0 = TREE_OPERAND (expr, 0);
3681       STRIP_NOPS (op0);
3682       op1 = NULL_TREE;
3683       break;
3684
3685     default:
3686       /* Just an arbitrary value, FIXME.  */
3687       return new_cost (target_spill_cost[speed], 0);
3688     }
3689
3690   if (op0 == NULL_TREE
3691       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3692     cost0 = no_cost;
3693   else
3694     cost0 = force_expr_to_var_cost (op0, speed);
3695
3696   if (op1 == NULL_TREE
3697       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3698     cost1 = no_cost;
3699   else
3700     cost1 = force_expr_to_var_cost (op1, speed);
3701
3702   mode = TYPE_MODE (TREE_TYPE (expr));
3703   switch (TREE_CODE (expr))
3704     {
3705     case POINTER_PLUS_EXPR:
3706     case PLUS_EXPR:
3707     case MINUS_EXPR:
3708     case NEGATE_EXPR:
3709       cost = new_cost (add_cost (speed, mode), 0);
3710       if (TREE_CODE (expr) != NEGATE_EXPR)
3711         {
3712           tree mult = NULL_TREE;
3713           comp_cost sa_cost;
3714           if (TREE_CODE (op1) == MULT_EXPR)
3715             mult = op1;
3716           else if (TREE_CODE (op0) == MULT_EXPR)
3717             mult = op0;
3718
3719           if (mult != NULL_TREE
3720               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3721               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3722                                     speed, &sa_cost))
3723             return sa_cost;
3724         }
3725       break;
3726
3727     CASE_CONVERT:
3728       {
3729         tree inner_mode, outer_mode;
3730         outer_mode = TREE_TYPE (expr);
3731         inner_mode = TREE_TYPE (op0);
3732         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3733                                        TYPE_MODE (inner_mode), speed), 0);
3734       }
3735       break;
3736
3737     case MULT_EXPR:
3738       if (cst_and_fits_in_hwi (op0))
3739         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3740                                              mode, speed), 0);
3741       else if (cst_and_fits_in_hwi (op1))
3742         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3743                                              mode, speed), 0);
3744       else
3745         return new_cost (target_spill_cost [speed], 0);
3746       break;
3747
3748     default:
3749       gcc_unreachable ();
3750     }
3751
3752   cost = add_costs (cost, cost0);
3753   cost = add_costs (cost, cost1);
3754
3755   /* Bound the cost by target_spill_cost.  The parts of complicated
3756      computations often are either loop invariant or at least can
3757      be shared between several iv uses, so letting this grow without
3758      limits would not give reasonable results.  */
3759   if (cost.cost > (int) target_spill_cost [speed])
3760     cost.cost = target_spill_cost [speed];
3761
3762   return cost;
3763 }
3764
3765 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3766    invariants the computation depends on.  */
3767
3768 static comp_cost
3769 force_var_cost (struct ivopts_data *data,
3770                 tree expr, bitmap *depends_on)
3771 {
3772   if (depends_on)
3773     {
3774       fd_ivopts_data = data;
3775       walk_tree (&expr, find_depends, depends_on, NULL);
3776     }
3777
3778   return force_expr_to_var_cost (expr, data->speed);
3779 }
3780
3781 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3782    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3783    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3784    invariants the computation depends on.  */
3785
3786 static comp_cost
3787 split_address_cost (struct ivopts_data *data,
3788                     tree addr, bool *symbol_present, bool *var_present,
3789                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3790 {
3791   tree core;
3792   HOST_WIDE_INT bitsize;
3793   HOST_WIDE_INT bitpos;
3794   tree toffset;
3795   enum machine_mode mode;
3796   int unsignedp, volatilep;
3797
3798   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3799                               &unsignedp, &volatilep, false);
3800
3801   if (toffset != 0
3802       || bitpos % BITS_PER_UNIT != 0
3803       || TREE_CODE (core) != VAR_DECL)
3804     {
3805       *symbol_present = false;
3806       *var_present = true;
3807       fd_ivopts_data = data;
3808       walk_tree (&addr, find_depends, depends_on, NULL);
3809       return new_cost (target_spill_cost[data->speed], 0);
3810     }
3811
3812   *offset += bitpos / BITS_PER_UNIT;
3813   if (TREE_STATIC (core)
3814       || DECL_EXTERNAL (core))
3815     {
3816       *symbol_present = true;
3817       *var_present = false;
3818       return no_cost;
3819     }
3820
3821   *symbol_present = false;
3822   *var_present = true;
3823   return no_cost;
3824 }
3825
3826 /* Estimates cost of expressing difference of addresses E1 - E2 as
3827    var + symbol + offset.  The value of offset is added to OFFSET,
3828    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3829    part is missing.  DEPENDS_ON is a set of the invariants the computation
3830    depends on.  */
3831
3832 static comp_cost
3833 ptr_difference_cost (struct ivopts_data *data,
3834                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3835                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3836 {
3837   HOST_WIDE_INT diff = 0;
3838   aff_tree aff_e1, aff_e2;
3839   tree type;
3840
3841   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3842
3843   if (ptr_difference_const (e1, e2, &diff))
3844     {
3845       *offset += diff;
3846       *symbol_present = false;
3847       *var_present = false;
3848       return no_cost;
3849     }
3850
3851   if (integer_zerop (e2))
3852     return split_address_cost (data, TREE_OPERAND (e1, 0),
3853                                symbol_present, var_present, offset, depends_on);
3854
3855   *symbol_present = false;
3856   *var_present = true;
3857
3858   type = signed_type_for (TREE_TYPE (e1));
3859   tree_to_aff_combination (e1, type, &aff_e1);
3860   tree_to_aff_combination (e2, type, &aff_e2);
3861   aff_combination_scale (&aff_e2, double_int_minus_one);
3862   aff_combination_add (&aff_e1, &aff_e2);
3863
3864   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3865 }
3866
3867 /* Estimates cost of expressing difference E1 - E2 as
3868    var + symbol + offset.  The value of offset is added to OFFSET,
3869    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3870    part is missing.  DEPENDS_ON is a set of the invariants the computation
3871    depends on.  */
3872
3873 static comp_cost
3874 difference_cost (struct ivopts_data *data,
3875                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3876                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3877 {
3878   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3879   unsigned HOST_WIDE_INT off1, off2;
3880   aff_tree aff_e1, aff_e2;
3881   tree type;
3882
3883   e1 = strip_offset (e1, &off1);
3884   e2 = strip_offset (e2, &off2);
3885   *offset += off1 - off2;
3886
3887   STRIP_NOPS (e1);
3888   STRIP_NOPS (e2);
3889
3890   if (TREE_CODE (e1) == ADDR_EXPR)
3891     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3892                                 offset, depends_on);
3893   *symbol_present = false;
3894
3895   if (operand_equal_p (e1, e2, 0))
3896     {
3897       *var_present = false;
3898       return no_cost;
3899     }
3900
3901   *var_present = true;
3902
3903   if (integer_zerop (e2))
3904     return force_var_cost (data, e1, depends_on);
3905
3906   if (integer_zerop (e1))
3907     {
3908       comp_cost cost = force_var_cost (data, e2, depends_on);
3909       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3910       return cost;
3911     }
3912
3913   type = signed_type_for (TREE_TYPE (e1));
3914   tree_to_aff_combination (e1, type, &aff_e1);
3915   tree_to_aff_combination (e2, type, &aff_e2);
3916   aff_combination_scale (&aff_e2, double_int_minus_one);
3917   aff_combination_add (&aff_e1, &aff_e2);
3918
3919   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3920 }
3921
3922 /* Returns true if AFF1 and AFF2 are identical.  */
3923
3924 static bool
3925 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3926 {
3927   unsigned i;
3928
3929   if (aff1->n != aff2->n)
3930     return false;
3931
3932   for (i = 0; i < aff1->n; i++)
3933     {
3934       if (aff1->elts[i].coef != aff2->elts[i].coef)
3935         return false;
3936
3937       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3938         return false;
3939     }
3940   return true;
3941 }
3942
3943 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3944
3945 static int
3946 get_expr_id (struct ivopts_data *data, tree expr)
3947 {
3948   struct iv_inv_expr_ent ent;
3949   struct iv_inv_expr_ent **slot;
3950
3951   ent.expr = expr;
3952   ent.hash = iterative_hash_expr (expr, 0);
3953   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3954   if (*slot)
3955     return (*slot)->id;
3956
3957   *slot = XNEW (struct iv_inv_expr_ent);
3958   (*slot)->expr = expr;
3959   (*slot)->hash = ent.hash;
3960   (*slot)->id = data->inv_expr_id++;
3961   return (*slot)->id;
3962 }
3963
3964 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3965    requires a new compiler generated temporary.  Returns -1 otherwise.
3966    ADDRESS_P is a flag indicating if the expression is for address
3967    computation.  */
3968
3969 static int
3970 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3971                             tree cbase, HOST_WIDE_INT ratio,
3972                             bool address_p)
3973 {
3974   aff_tree ubase_aff, cbase_aff;
3975   tree expr, ub, cb;
3976
3977   STRIP_NOPS (ubase);
3978   STRIP_NOPS (cbase);
3979   ub = ubase;
3980   cb = cbase;
3981
3982   if ((TREE_CODE (ubase) == INTEGER_CST)
3983       && (TREE_CODE (cbase) == INTEGER_CST))
3984     return -1;
3985
3986   /* Strips the constant part. */
3987   if (TREE_CODE (ubase) == PLUS_EXPR
3988       || TREE_CODE (ubase) == MINUS_EXPR
3989       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3990     {
3991       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3992         ubase = TREE_OPERAND (ubase, 0);
3993     }
3994
3995   /* Strips the constant part. */
3996   if (TREE_CODE (cbase) == PLUS_EXPR
3997       || TREE_CODE (cbase) == MINUS_EXPR
3998       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3999     {
4000       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4001         cbase = TREE_OPERAND (cbase, 0);
4002     }
4003
4004   if (address_p)
4005     {
4006       if (((TREE_CODE (ubase) == SSA_NAME)
4007            || (TREE_CODE (ubase) == ADDR_EXPR
4008                && is_gimple_min_invariant (ubase)))
4009           && (TREE_CODE (cbase) == INTEGER_CST))
4010         return -1;
4011
4012       if (((TREE_CODE (cbase) == SSA_NAME)
4013            || (TREE_CODE (cbase) == ADDR_EXPR
4014                && is_gimple_min_invariant (cbase)))
4015           && (TREE_CODE (ubase) == INTEGER_CST))
4016         return -1;
4017     }
4018
4019   if (ratio == 1)
4020     {
4021       if (operand_equal_p (ubase, cbase, 0))
4022         return -1;
4023
4024       if (TREE_CODE (ubase) == ADDR_EXPR
4025           && TREE_CODE (cbase) == ADDR_EXPR)
4026         {
4027           tree usym, csym;
4028
4029           usym = TREE_OPERAND (ubase, 0);
4030           csym = TREE_OPERAND (cbase, 0);
4031           if (TREE_CODE (usym) == ARRAY_REF)
4032             {
4033               tree ind = TREE_OPERAND (usym, 1);
4034               if (TREE_CODE (ind) == INTEGER_CST
4035                   && tree_fits_shwi_p (ind)
4036                   && tree_to_shwi (ind) == 0)
4037                 usym = TREE_OPERAND (usym, 0);
4038             }
4039           if (TREE_CODE (csym) == ARRAY_REF)
4040             {
4041               tree ind = TREE_OPERAND (csym, 1);
4042               if (TREE_CODE (ind) == INTEGER_CST
4043                   && tree_fits_shwi_p (ind)
4044                   && tree_to_shwi (ind) == 0)
4045                 csym = TREE_OPERAND (csym, 0);
4046             }
4047           if (operand_equal_p (usym, csym, 0))
4048             return -1;
4049         }
4050       /* Now do more complex comparison  */
4051       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4052       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4053       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4054         return -1;
4055     }
4056
4057   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4058   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4059
4060   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4061   aff_combination_add (&ubase_aff, &cbase_aff);
4062   expr = aff_combination_to_tree (&ubase_aff);
4063   return get_expr_id (data, expr);
4064 }
4065
4066
4067
4068 /* Determines the cost of the computation by that USE is expressed
4069    from induction variable CAND.  If ADDRESS_P is true, we just need
4070    to create an address from it, otherwise we want to get it into
4071    register.  A set of invariants we depend on is stored in
4072    DEPENDS_ON.  AT is the statement at that the value is computed.
4073    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4074    addressing is likely.  */
4075
4076 static comp_cost
4077 get_computation_cost_at (struct ivopts_data *data,
4078                          struct iv_use *use, struct iv_cand *cand,
4079                          bool address_p, bitmap *depends_on, gimple at,
4080                          bool *can_autoinc,
4081                          int *inv_expr_id)
4082 {
4083   tree ubase = use->iv->base, ustep = use->iv->step;
4084   tree cbase, cstep;
4085   tree utype = TREE_TYPE (ubase), ctype;
4086   unsigned HOST_WIDE_INT cstepi, offset = 0;
4087   HOST_WIDE_INT ratio, aratio;
4088   bool var_present, symbol_present, stmt_is_after_inc;
4089   comp_cost cost;
4090   double_int rat;
4091   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4092   enum machine_mode mem_mode = (address_p
4093                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4094                                 : VOIDmode);
4095
4096   *depends_on = NULL;
4097
4098   /* Only consider real candidates.  */
4099   if (!cand->iv)
4100     return infinite_cost;
4101
4102   cbase = cand->iv->base;
4103   cstep = cand->iv->step;
4104   ctype = TREE_TYPE (cbase);
4105
4106   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4107     {
4108       /* We do not have a precision to express the values of use.  */
4109       return infinite_cost;
4110     }
4111
4112   if (address_p
4113       || (use->iv->base_object
4114           && cand->iv->base_object
4115           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4116           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4117     {
4118       /* Do not try to express address of an object with computation based
4119          on address of a different object.  This may cause problems in rtl
4120          level alias analysis (that does not expect this to be happening,
4121          as this is illegal in C), and would be unlikely to be useful
4122          anyway.  */
4123       if (use->iv->base_object
4124           && cand->iv->base_object
4125           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4126         return infinite_cost;
4127     }
4128
4129   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4130     {
4131       /* TODO -- add direct handling of this case.  */
4132       goto fallback;
4133     }
4134
4135   /* CSTEPI is removed from the offset in case statement is after the
4136      increment.  If the step is not constant, we use zero instead.
4137      This is a bit imprecise (there is the extra addition), but
4138      redundancy elimination is likely to transform the code so that
4139      it uses value of the variable before increment anyway,
4140      so it is not that much unrealistic.  */
4141   if (cst_and_fits_in_hwi (cstep))
4142     cstepi = int_cst_value (cstep);
4143   else
4144     cstepi = 0;
4145
4146   if (!constant_multiple_of (ustep, cstep, &rat))
4147     return infinite_cost;
4148
4149   if (rat.fits_shwi ())
4150     ratio = rat.to_shwi ();
4151   else
4152     return infinite_cost;
4153
4154   STRIP_NOPS (cbase);
4155   ctype = TREE_TYPE (cbase);
4156
4157   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4158
4159   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4160      or ratio == 1, it is better to handle this like
4161
4162      ubase - ratio * cbase + ratio * var
4163
4164      (also holds in the case ratio == -1, TODO.  */
4165
4166   if (cst_and_fits_in_hwi (cbase))
4167     {
4168       offset = - ratio * int_cst_value (cbase);
4169       cost = difference_cost (data,
4170                               ubase, build_int_cst (utype, 0),
4171                               &symbol_present, &var_present, &offset,
4172                               depends_on);
4173       cost.cost /= avg_loop_niter (data->current_loop);
4174     }
4175   else if (ratio == 1)
4176     {
4177       tree real_cbase = cbase;
4178
4179       /* Check to see if any adjustment is needed.  */
4180       if (cstepi == 0 && stmt_is_after_inc)
4181         {
4182           aff_tree real_cbase_aff;
4183           aff_tree cstep_aff;
4184
4185           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4186                                    &real_cbase_aff);
4187           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4188
4189           aff_combination_add (&real_cbase_aff, &cstep_aff);
4190           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4191         }
4192
4193       cost = difference_cost (data,
4194                               ubase, real_cbase,
4195                               &symbol_present, &var_present, &offset,
4196                               depends_on);
4197       cost.cost /= avg_loop_niter (data->current_loop);
4198     }
4199   else if (address_p
4200            && !POINTER_TYPE_P (ctype)
4201            && multiplier_allowed_in_address_p
4202                 (ratio, mem_mode,
4203                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4204     {
4205       cbase
4206         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4207       cost = difference_cost (data,
4208                               ubase, cbase,
4209                               &symbol_present, &var_present, &offset,
4210                               depends_on);
4211       cost.cost /= avg_loop_niter (data->current_loop);
4212     }
4213   else
4214     {
4215       cost = force_var_cost (data, cbase, depends_on);
4216       cost = add_costs (cost,
4217                         difference_cost (data,
4218                                          ubase, build_int_cst (utype, 0),
4219                                          &symbol_present, &var_present,
4220                                          &offset, depends_on));
4221       cost.cost /= avg_loop_niter (data->current_loop);
4222       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4223     }
4224
4225   if (inv_expr_id)
4226     {
4227       *inv_expr_id =
4228           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4229       /* Clear depends on.  */
4230       if (*inv_expr_id != -1 && depends_on && *depends_on)
4231         bitmap_clear (*depends_on);
4232     }
4233
4234   /* If we are after the increment, the value of the candidate is higher by
4235      one iteration.  */
4236   if (stmt_is_after_inc)
4237     offset -= ratio * cstepi;
4238
4239   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4240      (symbol/var1/const parts may be omitted).  If we are looking for an
4241      address, find the cost of addressing this.  */
4242   if (address_p)
4243     return add_costs (cost,
4244                       get_address_cost (symbol_present, var_present,
4245                                         offset, ratio, cstepi,
4246                                         mem_mode,
4247                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4248                                         speed, stmt_is_after_inc,
4249                                         can_autoinc));
4250
4251   /* Otherwise estimate the costs for computing the expression.  */
4252   if (!symbol_present && !var_present && !offset)
4253     {
4254       if (ratio != 1)
4255         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4256       return cost;
4257     }
4258
4259   /* Symbol + offset should be compile-time computable so consider that they
4260       are added once to the variable, if present.  */
4261   if (var_present && (symbol_present || offset))
4262     cost.cost += adjust_setup_cost (data,
4263                                     add_cost (speed, TYPE_MODE (ctype)));
4264
4265   /* Having offset does not affect runtime cost in case it is added to
4266      symbol, but it increases complexity.  */
4267   if (offset)
4268     cost.complexity++;
4269
4270   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4271
4272   aratio = ratio > 0 ? ratio : -ratio;
4273   if (aratio != 1)
4274     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4275   return cost;
4276
4277 fallback:
4278   if (can_autoinc)
4279     *can_autoinc = false;
4280
4281   {
4282     /* Just get the expression, expand it and measure the cost.  */
4283     tree comp = get_computation_at (data->current_loop, use, cand, at);
4284
4285     if (!comp)
4286       return infinite_cost;
4287
4288     if (address_p)
4289       comp = build_simple_mem_ref (comp);
4290
4291     return new_cost (computation_cost (comp, speed), 0);
4292   }
4293 }
4294
4295 /* Determines the cost of the computation by that USE is expressed
4296    from induction variable CAND.  If ADDRESS_P is true, we just need
4297    to create an address from it, otherwise we want to get it into
4298    register.  A set of invariants we depend on is stored in
4299    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4300    autoinc addressing is likely.  */
4301
4302 static comp_cost
4303 get_computation_cost (struct ivopts_data *data,
4304                       struct iv_use *use, struct iv_cand *cand,
4305                       bool address_p, bitmap *depends_on,
4306                       bool *can_autoinc, int *inv_expr_id)
4307 {
4308   return get_computation_cost_at (data,
4309                                   use, cand, address_p, depends_on, use->stmt,
4310                                   can_autoinc, inv_expr_id);
4311 }
4312
4313 /* Determines cost of basing replacement of USE on CAND in a generic
4314    expression.  */
4315
4316 static bool
4317 determine_use_iv_cost_generic (struct ivopts_data *data,
4318                                struct iv_use *use, struct iv_cand *cand)
4319 {
4320   bitmap depends_on;
4321   comp_cost cost;
4322   int inv_expr_id = -1;
4323
4324   /* The simple case first -- if we need to express value of the preserved
4325      original biv, the cost is 0.  This also prevents us from counting the
4326      cost of increment twice -- once at this use and once in the cost of
4327      the candidate.  */
4328   if (cand->pos == IP_ORIGINAL
4329       && cand->incremented_at == use->stmt)
4330     {
4331       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4332                        ERROR_MARK, -1);
4333       return true;
4334     }
4335
4336   cost = get_computation_cost (data, use, cand, false, &depends_on,
4337                                NULL, &inv_expr_id);
4338
4339   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4340                    inv_expr_id);
4341
4342   return !infinite_cost_p (cost);
4343 }
4344
4345 /* Determines cost of basing replacement of USE on CAND in an address.  */
4346
4347 static bool
4348 determine_use_iv_cost_address (struct ivopts_data *data,
4349                                struct iv_use *use, struct iv_cand *cand)
4350 {
4351   bitmap depends_on;
4352   bool can_autoinc;
4353   int inv_expr_id = -1;
4354   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4355                                          &can_autoinc, &inv_expr_id);
4356
4357   if (cand->ainc_use == use)
4358     {
4359       if (can_autoinc)
4360         cost.cost -= cand->cost_step;
4361       /* If we generated the candidate solely for exploiting autoincrement
4362          opportunities, and it turns out it can't be used, set the cost to
4363          infinity to make sure we ignore it.  */
4364       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4365         cost = infinite_cost;
4366     }
4367   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4368                    inv_expr_id);
4369
4370   return !infinite_cost_p (cost);
4371 }
4372
4373 /* Computes value of candidate CAND at position AT in iteration NITER, and
4374    stores it to VAL.  */
4375
4376 static void
4377 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4378                aff_tree *val)
4379 {
4380   aff_tree step, delta, nit;
4381   struct iv *iv = cand->iv;
4382   tree type = TREE_TYPE (iv->base);
4383   tree steptype = type;
4384   if (POINTER_TYPE_P (type))
4385     steptype = sizetype;
4386
4387   tree_to_aff_combination (iv->step, steptype, &step);
4388   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4389   aff_combination_convert (&nit, steptype);
4390   aff_combination_mult (&nit, &step, &delta);
4391   if (stmt_after_increment (loop, cand, at))
4392     aff_combination_add (&delta, &step);
4393
4394   tree_to_aff_combination (iv->base, type, val);
4395   aff_combination_add (val, &delta);
4396 }
4397
4398 /* Returns period of induction variable iv.  */
4399
4400 static tree
4401 iv_period (struct iv *iv)
4402 {
4403   tree step = iv->step, period, type;
4404   tree pow2div;
4405
4406   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4407
4408   type = unsigned_type_for (TREE_TYPE (step));
4409   /* Period of the iv is lcm (step, type_range)/step -1,
4410      i.e., N*type_range/step - 1. Since type range is power
4411      of two, N == (step >> num_of_ending_zeros_binary (step),
4412      so the final result is
4413
4414        (type_range >> num_of_ending_zeros_binary (step)) - 1
4415
4416   */
4417   pow2div = num_ending_zeros (step);
4418
4419   period = build_low_bits_mask (type,
4420                                 (TYPE_PRECISION (type)
4421                                  - tree_to_uhwi (pow2div)));
4422
4423   return period;
4424 }
4425
4426 /* Returns the comparison operator used when eliminating the iv USE.  */
4427
4428 static enum tree_code
4429 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4430 {
4431   struct loop *loop = data->current_loop;
4432   basic_block ex_bb;
4433   edge exit;
4434
4435   ex_bb = gimple_bb (use->stmt);
4436   exit = EDGE_SUCC (ex_bb, 0);
4437   if (flow_bb_inside_loop_p (loop, exit->dest))
4438     exit = EDGE_SUCC (ex_bb, 1);
4439
4440   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4441 }
4442
4443 static tree
4444 strip_wrap_conserving_type_conversions (tree exp)
4445 {
4446   while (tree_ssa_useless_type_conversion (exp)
4447          && (nowrap_type_p (TREE_TYPE (exp))
4448              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4449     exp = TREE_OPERAND (exp, 0);
4450   return exp;
4451 }
4452
4453 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4454    check for an exact match.  */
4455
4456 static bool
4457 expr_equal_p (tree e, tree what)
4458 {
4459   gimple stmt;
4460   enum tree_code code;
4461
4462   e = strip_wrap_conserving_type_conversions (e);
4463   what = strip_wrap_conserving_type_conversions (what);
4464
4465   code = TREE_CODE (what);
4466   if (TREE_TYPE (e) != TREE_TYPE (what))
4467     return false;
4468
4469   if (operand_equal_p (e, what, 0))
4470     return true;
4471
4472   if (TREE_CODE (e) != SSA_NAME)
4473     return false;
4474
4475   stmt = SSA_NAME_DEF_STMT (e);
4476   if (gimple_code (stmt) != GIMPLE_ASSIGN
4477       || gimple_assign_rhs_code (stmt) != code)
4478     return false;
4479
4480   switch (get_gimple_rhs_class (code))
4481     {
4482     case GIMPLE_BINARY_RHS:
4483       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4484         return false;
4485       /* Fallthru.  */
4486
4487     case GIMPLE_UNARY_RHS:
4488     case GIMPLE_SINGLE_RHS:
4489       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4490     default:
4491       return false;
4492     }
4493 }
4494
4495 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4496    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4497    calculation is performed in non-wrapping type.
4498
4499    TODO: More generally, we could test for the situation that
4500          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4501          This would require knowing the sign of OFFSET.
4502
4503          Also, we only look for the first addition in the computation of BASE.
4504          More complex analysis would be better, but introducing it just for
4505          this optimization seems like an overkill.  */
4506
4507 static bool
4508 difference_cannot_overflow_p (tree base, tree offset)
4509 {
4510   enum tree_code code;
4511   tree e1, e2;
4512
4513   if (!nowrap_type_p (TREE_TYPE (base)))
4514     return false;
4515
4516   base = expand_simple_operations (base);
4517
4518   if (TREE_CODE (base) == SSA_NAME)
4519     {
4520       gimple stmt = SSA_NAME_DEF_STMT (base);
4521
4522       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4523         return false;
4524
4525       code = gimple_assign_rhs_code (stmt);
4526       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4527         return false;
4528
4529       e1 = gimple_assign_rhs1 (stmt);
4530       e2 = gimple_assign_rhs2 (stmt);
4531     }
4532   else
4533     {
4534       code = TREE_CODE (base);
4535       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4536         return false;
4537       e1 = TREE_OPERAND (base, 0);
4538       e2 = TREE_OPERAND (base, 1);
4539     }
4540
4541   /* TODO: deeper inspection may be necessary to prove the equality.  */
4542   switch (code)
4543     {
4544     case PLUS_EXPR:
4545       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4546     case POINTER_PLUS_EXPR:
4547       return expr_equal_p (e2, offset);
4548
4549     default:
4550       return false;
4551     }
4552 }
4553
4554 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4555    comparison with CAND.  NITER describes the number of iterations of
4556    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4557
4558    We aim to handle the following situation:
4559
4560    sometype *base, *p;
4561    int a, b, i;
4562
4563    i = a;
4564    p = p_0 = base + a;
4565
4566    do
4567      {
4568        bla (*p);
4569        p++;
4570        i++;
4571      }
4572    while (i < b);
4573
4574    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4575    We aim to optimize this to
4576
4577    p = p_0 = base + a;
4578    do
4579      {
4580        bla (*p);
4581        p++;
4582      }
4583    while (p < p_0 - a + b);
4584
4585    This preserves the correctness, since the pointer arithmetics does not
4586    overflow.  More precisely:
4587
4588    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4589       overflow in computing it or the values of p.
4590    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4591       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4592
4593 static bool
4594 iv_elimination_compare_lt (struct ivopts_data *data,
4595                            struct iv_cand *cand, enum tree_code *comp_p,
4596                            struct tree_niter_desc *niter)
4597 {
4598   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4599   struct aff_tree nit, tmpa, tmpb;
4600   enum tree_code comp;
4601   HOST_WIDE_INT step;
4602
4603   /* We need to know that the candidate induction variable does not overflow.
4604      While more complex analysis may be used to prove this, for now just
4605      check that the variable appears in the original program and that it
4606      is computed in a type that guarantees no overflows.  */
4607   cand_type = TREE_TYPE (cand->iv->base);
4608   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4609     return false;
4610
4611   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4612      the calculation of the BOUND could overflow, making the comparison
4613      invalid.  */
4614   if (!data->loop_single_exit_p)
4615     return false;
4616
4617   /* We need to be able to decide whether candidate is increasing or decreasing
4618      in order to choose the right comparison operator.  */
4619   if (!cst_and_fits_in_hwi (cand->iv->step))
4620     return false;
4621   step = int_cst_value (cand->iv->step);
4622
4623   /* Check that the number of iterations matches the expected pattern:
4624      a + 1 > b ? 0 : b - a - 1.  */
4625   mbz = niter->may_be_zero;
4626   if (TREE_CODE (mbz) == GT_EXPR)
4627     {
4628       /* Handle a + 1 > b.  */
4629       tree op0 = TREE_OPERAND (mbz, 0);
4630       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4631         {
4632           a = TREE_OPERAND (op0, 0);
4633           b = TREE_OPERAND (mbz, 1);
4634         }
4635       else
4636         return false;
4637     }
4638   else if (TREE_CODE (mbz) == LT_EXPR)
4639     {
4640       tree op1 = TREE_OPERAND (mbz, 1);
4641
4642       /* Handle b < a + 1.  */
4643       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4644         {
4645           a = TREE_OPERAND (op1, 0);
4646           b = TREE_OPERAND (mbz, 0);
4647         }
4648       else
4649         return false;
4650     }
4651   else
4652     return false;
4653
4654   /* Expected number of iterations is B - A - 1.  Check that it matches
4655      the actual number, i.e., that B - A - NITER = 1.  */
4656   tree_to_aff_combination (niter->niter, nit_type, &nit);
4657   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4658   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4659   aff_combination_scale (&nit, double_int_minus_one);
4660   aff_combination_scale (&tmpa, double_int_minus_one);
4661   aff_combination_add (&tmpb, &tmpa);
4662   aff_combination_add (&tmpb, &nit);
4663   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4664     return false;
4665
4666   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4667      overflow.  */
4668   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4669                         cand->iv->step,
4670                         fold_convert (TREE_TYPE (cand->iv->step), a));
4671   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4672     return false;
4673
4674   /* Determine the new comparison operator.  */
4675   comp = step < 0 ? GT_EXPR : LT_EXPR;
4676   if (*comp_p == NE_EXPR)
4677     *comp_p = comp;
4678   else if (*comp_p == EQ_EXPR)
4679     *comp_p = invert_tree_comparison (comp, false);
4680   else
4681     gcc_unreachable ();
4682
4683   return true;
4684 }
4685
4686 /* Check whether it is possible to express the condition in USE by comparison
4687    of candidate CAND.  If so, store the value compared with to BOUND, and the
4688    comparison operator to COMP.  */
4689
4690 static bool
4691 may_eliminate_iv (struct ivopts_data *data,
4692                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4693                   enum tree_code *comp)
4694 {
4695   basic_block ex_bb;
4696   edge exit;
4697   tree period;
4698   struct loop *loop = data->current_loop;
4699   aff_tree bnd;
4700   struct tree_niter_desc *desc = NULL;
4701
4702   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4703     return false;
4704
4705   /* For now works only for exits that dominate the loop latch.
4706      TODO: extend to other conditions inside loop body.  */
4707   ex_bb = gimple_bb (use->stmt);
4708   if (use->stmt != last_stmt (ex_bb)
4709       || gimple_code (use->stmt) != GIMPLE_COND
4710       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4711     return false;
4712
4713   exit = EDGE_SUCC (ex_bb, 0);
4714   if (flow_bb_inside_loop_p (loop, exit->dest))
4715     exit = EDGE_SUCC (ex_bb, 1);
4716   if (flow_bb_inside_loop_p (loop, exit->dest))
4717     return false;
4718
4719   desc = niter_for_exit (data, exit);
4720   if (!desc)
4721     return false;
4722
4723   /* Determine whether we can use the variable to test the exit condition.
4724      This is the case iff the period of the induction variable is greater
4725      than the number of iterations for which the exit condition is true.  */
4726   period = iv_period (cand->iv);
4727
4728   /* If the number of iterations is constant, compare against it directly.  */
4729   if (TREE_CODE (desc->niter) == INTEGER_CST)
4730     {
4731       /* See cand_value_at.  */
4732       if (stmt_after_increment (loop, cand, use->stmt))
4733         {
4734           if (!tree_int_cst_lt (desc->niter, period))
4735             return false;
4736         }
4737       else
4738         {
4739           if (tree_int_cst_lt (period, desc->niter))
4740             return false;
4741         }
4742     }
4743
4744   /* If not, and if this is the only possible exit of the loop, see whether
4745      we can get a conservative estimate on the number of iterations of the
4746      entire loop and compare against that instead.  */
4747   else
4748     {
4749       double_int period_value, max_niter;
4750
4751       max_niter = desc->max;
4752       if (stmt_after_increment (loop, cand, use->stmt))
4753         max_niter += double_int_one;
4754       period_value = tree_to_double_int (period);
4755       if (max_niter.ugt (period_value))
4756         {
4757           /* See if we can take advantage of inferred loop bound information.  */
4758           if (data->loop_single_exit_p)
4759             {
4760               if (!max_loop_iterations (loop, &max_niter))
4761                 return false;
4762               /* The loop bound is already adjusted by adding 1.  */
4763               if (max_niter.ugt (period_value))
4764                 return false;
4765             }
4766           else
4767             return false;
4768         }
4769     }
4770
4771   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4772
4773   *bound = aff_combination_to_tree (&bnd);
4774   *comp = iv_elimination_compare (data, use);
4775
4776   /* It is unlikely that computing the number of iterations using division
4777      would be more profitable than keeping the original induction variable.  */
4778   if (expression_expensive_p (*bound))
4779     return false;
4780
4781   /* Sometimes, it is possible to handle the situation that the number of
4782      iterations may be zero unless additional assumtions by using <
4783      instead of != in the exit condition.
4784
4785      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4786            base the exit condition on it.  However, that is often too
4787            expensive.  */
4788   if (!integer_zerop (desc->may_be_zero))
4789     return iv_elimination_compare_lt (data, cand, comp, desc);
4790
4791   return true;
4792 }
4793
4794  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4795     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4796
4797 static int
4798 parm_decl_cost (struct ivopts_data *data, tree bound)
4799 {
4800   tree sbound = bound;
4801   STRIP_NOPS (sbound);
4802
4803   if (TREE_CODE (sbound) == SSA_NAME
4804       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4805       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4806       && data->body_includes_call)
4807     return COSTS_N_INSNS (1);
4808
4809   return 0;
4810 }
4811
4812 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4813
4814 static bool
4815 determine_use_iv_cost_condition (struct ivopts_data *data,
4816                                  struct iv_use *use, struct iv_cand *cand)
4817 {
4818   tree bound = NULL_TREE;
4819   struct iv *cmp_iv;
4820   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4821   comp_cost elim_cost, express_cost, cost, bound_cost;
4822   bool ok;
4823   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4824   tree *control_var, *bound_cst;
4825   enum tree_code comp = ERROR_MARK;
4826
4827   /* Only consider real candidates.  */
4828   if (!cand->iv)
4829     {
4830       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4831                        ERROR_MARK, -1);
4832       return false;
4833     }
4834
4835   /* Try iv elimination.  */
4836   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4837     {
4838       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4839       if (elim_cost.cost == 0)
4840         elim_cost.cost = parm_decl_cost (data, bound);
4841       else if (TREE_CODE (bound) == INTEGER_CST)
4842         elim_cost.cost = 0;
4843       /* If we replace a loop condition 'i < n' with 'p < base + n',
4844          depends_on_elim will have 'base' and 'n' set, which implies
4845          that both 'base' and 'n' will be live during the loop.  More likely,
4846          'base + n' will be loop invariant, resulting in only one live value
4847          during the loop.  So in that case we clear depends_on_elim and set
4848         elim_inv_expr_id instead.  */
4849       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4850         {
4851           elim_inv_expr_id = get_expr_id (data, bound);
4852           bitmap_clear (depends_on_elim);
4853         }
4854       /* The bound is a loop invariant, so it will be only computed
4855          once.  */
4856       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4857     }
4858   else
4859     elim_cost = infinite_cost;
4860
4861   /* Try expressing the original giv.  If it is compared with an invariant,
4862      note that we cannot get rid of it.  */
4863   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4864                               NULL, &cmp_iv);
4865   gcc_assert (ok);
4866
4867   /* When the condition is a comparison of the candidate IV against
4868      zero, prefer this IV.
4869
4870      TODO: The constant that we're subtracting from the cost should
4871      be target-dependent.  This information should be added to the
4872      target costs for each backend.  */
4873   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4874       && integer_zerop (*bound_cst)
4875       && (operand_equal_p (*control_var, cand->var_after, 0)
4876           || operand_equal_p (*control_var, cand->var_before, 0)))
4877     elim_cost.cost -= 1;
4878
4879   express_cost = get_computation_cost (data, use, cand, false,
4880                                        &depends_on_express, NULL,
4881                                        &express_inv_expr_id);
4882   fd_ivopts_data = data;
4883   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4884
4885   /* Count the cost of the original bound as well.  */
4886   bound_cost = force_var_cost (data, *bound_cst, NULL);
4887   if (bound_cost.cost == 0)
4888     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4889   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4890     bound_cost.cost = 0;
4891   express_cost.cost += bound_cost.cost;
4892
4893   /* Choose the better approach, preferring the eliminated IV. */
4894   if (compare_costs (elim_cost, express_cost) <= 0)
4895     {
4896       cost = elim_cost;
4897       depends_on = depends_on_elim;
4898       depends_on_elim = NULL;
4899       inv_expr_id = elim_inv_expr_id;
4900     }
4901   else
4902     {
4903       cost = express_cost;
4904       depends_on = depends_on_express;
4905       depends_on_express = NULL;
4906       bound = NULL_TREE;
4907       comp = ERROR_MARK;
4908       inv_expr_id = express_inv_expr_id;
4909     }
4910
4911   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4912
4913   if (depends_on_elim)
4914     BITMAP_FREE (depends_on_elim);
4915   if (depends_on_express)
4916     BITMAP_FREE (depends_on_express);
4917
4918   return !infinite_cost_p (cost);
4919 }
4920
4921 /* Determines cost of basing replacement of USE on CAND.  Returns false
4922    if USE cannot be based on CAND.  */
4923
4924 static bool
4925 determine_use_iv_cost (struct ivopts_data *data,
4926                        struct iv_use *use, struct iv_cand *cand)
4927 {
4928   switch (use->type)
4929     {
4930     case USE_NONLINEAR_EXPR:
4931       return determine_use_iv_cost_generic (data, use, cand);
4932
4933     case USE_ADDRESS:
4934       return determine_use_iv_cost_address (data, use, cand);
4935
4936     case USE_COMPARE:
4937       return determine_use_iv_cost_condition (data, use, cand);
4938
4939     default:
4940       gcc_unreachable ();
4941     }
4942 }
4943
4944 /* Return true if get_computation_cost indicates that autoincrement is
4945    a possibility for the pair of USE and CAND, false otherwise.  */
4946
4947 static bool
4948 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4949                            struct iv_cand *cand)
4950 {
4951   bitmap depends_on;
4952   bool can_autoinc;
4953   comp_cost cost;
4954
4955   if (use->type != USE_ADDRESS)
4956     return false;
4957
4958   cost = get_computation_cost (data, use, cand, true, &depends_on,
4959                                &can_autoinc, NULL);
4960
4961   BITMAP_FREE (depends_on);
4962
4963   return !infinite_cost_p (cost) && can_autoinc;
4964 }
4965
4966 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4967    use that allows autoincrement, and set their AINC_USE if possible.  */
4968
4969 static void
4970 set_autoinc_for_original_candidates (struct ivopts_data *data)
4971 {
4972   unsigned i, j;
4973
4974   for (i = 0; i < n_iv_cands (data); i++)
4975     {
4976       struct iv_cand *cand = iv_cand (data, i);
4977       struct iv_use *closest_before = NULL;
4978       struct iv_use *closest_after = NULL;
4979       if (cand->pos != IP_ORIGINAL)
4980         continue;
4981
4982       for (j = 0; j < n_iv_uses (data); j++)
4983         {
4984           struct iv_use *use = iv_use (data, j);
4985           unsigned uid = gimple_uid (use->stmt);
4986
4987           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4988             continue;
4989
4990           if (uid < gimple_uid (cand->incremented_at)
4991               && (closest_before == NULL
4992                   || uid > gimple_uid (closest_before->stmt)))
4993             closest_before = use;
4994
4995           if (uid > gimple_uid (cand->incremented_at)
4996               && (closest_after == NULL
4997                   || uid < gimple_uid (closest_after->stmt)))
4998             closest_after = use;
4999         }
5000
5001       if (closest_before != NULL
5002           && autoinc_possible_for_pair (data, closest_before, cand))
5003         cand->ainc_use = closest_before;
5004       else if (closest_after != NULL
5005                && autoinc_possible_for_pair (data, closest_after, cand))
5006         cand->ainc_use = closest_after;
5007     }
5008 }
5009
5010 /* Finds the candidates for the induction variables.  */
5011
5012 static void
5013 find_iv_candidates (struct ivopts_data *data)
5014 {
5015   /* Add commonly used ivs.  */
5016   add_standard_iv_candidates (data);
5017
5018   /* Add old induction variables.  */
5019   add_old_ivs_candidates (data);
5020
5021   /* Add induction variables derived from uses.  */
5022   add_derived_ivs_candidates (data);
5023
5024   set_autoinc_for_original_candidates (data);
5025
5026   /* Record the important candidates.  */
5027   record_important_candidates (data);
5028 }
5029
5030 /* Determines costs of basing the use of the iv on an iv candidate.  */
5031
5032 static void
5033 determine_use_iv_costs (struct ivopts_data *data)
5034 {
5035   unsigned i, j;
5036   struct iv_use *use;
5037   struct iv_cand *cand;
5038   bitmap to_clear = BITMAP_ALLOC (NULL);
5039
5040   alloc_use_cost_map (data);
5041
5042   for (i = 0; i < n_iv_uses (data); i++)
5043     {
5044       use = iv_use (data, i);
5045
5046       if (data->consider_all_candidates)
5047         {
5048           for (j = 0; j < n_iv_cands (data); j++)
5049             {
5050               cand = iv_cand (data, j);
5051               determine_use_iv_cost (data, use, cand);
5052             }
5053         }
5054       else
5055         {
5056           bitmap_iterator bi;
5057
5058           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5059             {
5060               cand = iv_cand (data, j);
5061               if (!determine_use_iv_cost (data, use, cand))
5062                 bitmap_set_bit (to_clear, j);
5063             }
5064
5065           /* Remove the candidates for that the cost is infinite from
5066              the list of related candidates.  */
5067           bitmap_and_compl_into (use->related_cands, to_clear);
5068           bitmap_clear (to_clear);
5069         }
5070     }
5071
5072   BITMAP_FREE (to_clear);
5073
5074   if (dump_file && (dump_flags & TDF_DETAILS))
5075     {
5076       fprintf (dump_file, "Use-candidate costs:\n");
5077
5078       for (i = 0; i < n_iv_uses (data); i++)
5079         {
5080           use = iv_use (data, i);
5081
5082           fprintf (dump_file, "Use %d:\n", i);
5083           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5084           for (j = 0; j < use->n_map_members; j++)
5085             {
5086               if (!use->cost_map[j].cand
5087                   || infinite_cost_p (use->cost_map[j].cost))
5088                 continue;
5089
5090               fprintf (dump_file, "  %d\t%d\t%d\t",
5091                        use->cost_map[j].cand->id,
5092                        use->cost_map[j].cost.cost,
5093                        use->cost_map[j].cost.complexity);
5094               if (use->cost_map[j].depends_on)
5095                 bitmap_print (dump_file,
5096                               use->cost_map[j].depends_on, "","");
5097               if (use->cost_map[j].inv_expr_id != -1)
5098                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5099               fprintf (dump_file, "\n");
5100             }
5101
5102           fprintf (dump_file, "\n");
5103         }
5104       fprintf (dump_file, "\n");
5105     }
5106 }
5107
5108 /* Determines cost of the candidate CAND.  */
5109
5110 static void
5111 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5112 {
5113   comp_cost cost_base;
5114   unsigned cost, cost_step;
5115   tree base;
5116
5117   if (!cand->iv)
5118     {
5119       cand->cost = 0;
5120       return;
5121     }
5122
5123   /* There are two costs associated with the candidate -- its increment
5124      and its initialization.  The second is almost negligible for any loop
5125      that rolls enough, so we take it just very little into account.  */
5126
5127   base = cand->iv->base;
5128   cost_base = force_var_cost (data, base, NULL);
5129   /* It will be exceptional that the iv register happens to be initialized with
5130      the proper value at no cost.  In general, there will at least be a regcopy
5131      or a const set.  */
5132   if (cost_base.cost == 0)
5133     cost_base.cost = COSTS_N_INSNS (1);
5134   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5135
5136   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5137
5138   /* Prefer the original ivs unless we may gain something by replacing it.
5139      The reason is to make debugging simpler; so this is not relevant for
5140      artificial ivs created by other optimization passes.  */
5141   if (cand->pos != IP_ORIGINAL
5142       || !SSA_NAME_VAR (cand->var_before)
5143       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5144     cost++;
5145
5146   /* Prefer not to insert statements into latch unless there are some
5147      already (so that we do not create unnecessary jumps).  */
5148   if (cand->pos == IP_END
5149       && empty_block_p (ip_end_pos (data->current_loop)))
5150     cost++;
5151
5152   cand->cost = cost;
5153   cand->cost_step = cost_step;
5154 }
5155
5156 /* Determines costs of computation of the candidates.  */
5157
5158 static void
5159 determine_iv_costs (struct ivopts_data *data)
5160 {
5161   unsigned i;
5162
5163   if (dump_file && (dump_flags & TDF_DETAILS))
5164     {
5165       fprintf (dump_file, "Candidate costs:\n");
5166       fprintf (dump_file, "  cand\tcost\n");
5167     }
5168
5169   for (i = 0; i < n_iv_cands (data); i++)
5170     {
5171       struct iv_cand *cand = iv_cand (data, i);
5172
5173       determine_iv_cost (data, cand);
5174
5175       if (dump_file && (dump_flags & TDF_DETAILS))
5176         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5177     }
5178
5179   if (dump_file && (dump_flags & TDF_DETAILS))
5180     fprintf (dump_file, "\n");
5181 }
5182
5183 /* Calculates cost for having SIZE induction variables.  */
5184
5185 static unsigned
5186 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5187 {
5188   /* We add size to the cost, so that we prefer eliminating ivs
5189      if possible.  */
5190   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5191                                             data->body_includes_call);
5192 }
5193
5194 /* For each size of the induction variable set determine the penalty.  */
5195
5196 static void
5197 determine_set_costs (struct ivopts_data *data)
5198 {
5199   unsigned j, n;
5200   gimple phi;
5201   gimple_stmt_iterator psi;
5202   tree op;
5203   struct loop *loop = data->current_loop;
5204   bitmap_iterator bi;
5205
5206   if (dump_file && (dump_flags & TDF_DETAILS))
5207     {
5208       fprintf (dump_file, "Global costs:\n");
5209       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5210       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5211       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5212       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5213     }
5214
5215   n = 0;
5216   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5217     {
5218       phi = gsi_stmt (psi);
5219       op = PHI_RESULT (phi);
5220
5221       if (virtual_operand_p (op))
5222         continue;
5223
5224       if (get_iv (data, op))
5225         continue;
5226
5227       n++;
5228     }
5229
5230   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5231     {
5232       struct version_info *info = ver_info (data, j);
5233
5234       if (info->inv_id && info->has_nonlin_use)
5235         n++;
5236     }
5237
5238   data->regs_used = n;
5239   if (dump_file && (dump_flags & TDF_DETAILS))
5240     fprintf (dump_file, "  regs_used %d\n", n);
5241
5242   if (dump_file && (dump_flags & TDF_DETAILS))
5243     {
5244       fprintf (dump_file, "  cost for size:\n");
5245       fprintf (dump_file, "  ivs\tcost\n");
5246       for (j = 0; j <= 2 * target_avail_regs; j++)
5247         fprintf (dump_file, "  %d\t%d\n", j,
5248                  ivopts_global_cost_for_size (data, j));
5249       fprintf (dump_file, "\n");
5250     }
5251 }
5252
5253 /* Returns true if A is a cheaper cost pair than B.  */
5254
5255 static bool
5256 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5257 {
5258   int cmp;
5259
5260   if (!a)
5261     return false;
5262
5263   if (!b)
5264     return true;
5265
5266   cmp = compare_costs (a->cost, b->cost);
5267   if (cmp < 0)
5268     return true;
5269
5270   if (cmp > 0)
5271     return false;
5272
5273   /* In case the costs are the same, prefer the cheaper candidate.  */
5274   if (a->cand->cost < b->cand->cost)
5275     return true;
5276
5277   return false;
5278 }
5279
5280
5281 /* Returns candidate by that USE is expressed in IVS.  */
5282
5283 static struct cost_pair *
5284 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5285 {
5286   return ivs->cand_for_use[use->id];
5287 }
5288
5289 /* Computes the cost field of IVS structure.  */
5290
5291 static void
5292 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5293 {
5294   comp_cost cost = ivs->cand_use_cost;
5295
5296   cost.cost += ivs->cand_cost;
5297
5298   cost.cost += ivopts_global_cost_for_size (data,
5299                                             ivs->n_regs + ivs->num_used_inv_expr);
5300
5301   ivs->cost = cost;
5302 }
5303
5304 /* Remove invariants in set INVS to set IVS.  */
5305
5306 static void
5307 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5308 {
5309   bitmap_iterator bi;
5310   unsigned iid;
5311
5312   if (!invs)
5313     return;
5314
5315   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5316     {
5317       ivs->n_invariant_uses[iid]--;
5318       if (ivs->n_invariant_uses[iid] == 0)
5319         ivs->n_regs--;
5320     }
5321 }
5322
5323 /* Set USE not to be expressed by any candidate in IVS.  */
5324
5325 static void
5326 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5327                  struct iv_use *use)
5328 {
5329   unsigned uid = use->id, cid;
5330   struct cost_pair *cp;
5331
5332   cp = ivs->cand_for_use[uid];
5333   if (!cp)
5334     return;
5335   cid = cp->cand->id;
5336
5337   ivs->bad_uses++;
5338   ivs->cand_for_use[uid] = NULL;
5339   ivs->n_cand_uses[cid]--;
5340
5341   if (ivs->n_cand_uses[cid] == 0)
5342     {
5343       bitmap_clear_bit (ivs->cands, cid);
5344       /* Do not count the pseudocandidates.  */
5345       if (cp->cand->iv)
5346         ivs->n_regs--;
5347       ivs->n_cands--;
5348       ivs->cand_cost -= cp->cand->cost;
5349
5350       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5351     }
5352
5353   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5354
5355   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5356
5357   if (cp->inv_expr_id != -1)
5358     {
5359       ivs->used_inv_expr[cp->inv_expr_id]--;
5360       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5361         ivs->num_used_inv_expr--;
5362     }
5363   iv_ca_recount_cost (data, ivs);
5364 }
5365
5366 /* Add invariants in set INVS to set IVS.  */
5367
5368 static void
5369 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5370 {
5371   bitmap_iterator bi;
5372   unsigned iid;
5373
5374   if (!invs)
5375     return;
5376
5377   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5378     {
5379       ivs->n_invariant_uses[iid]++;
5380       if (ivs->n_invariant_uses[iid] == 1)
5381         ivs->n_regs++;
5382     }
5383 }
5384
5385 /* Set cost pair for USE in set IVS to CP.  */
5386
5387 static void
5388 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5389               struct iv_use *use, struct cost_pair *cp)
5390 {
5391   unsigned uid = use->id, cid;
5392
5393   if (ivs->cand_for_use[uid] == cp)
5394     return;
5395
5396   if (ivs->cand_for_use[uid])
5397     iv_ca_set_no_cp (data, ivs, use);
5398
5399   if (cp)
5400     {
5401       cid = cp->cand->id;
5402
5403       ivs->bad_uses--;
5404       ivs->cand_for_use[uid] = cp;
5405       ivs->n_cand_uses[cid]++;
5406       if (ivs->n_cand_uses[cid] == 1)
5407         {
5408           bitmap_set_bit (ivs->cands, cid);
5409           /* Do not count the pseudocandidates.  */
5410           if (cp->cand->iv)
5411             ivs->n_regs++;
5412           ivs->n_cands++;
5413           ivs->cand_cost += cp->cand->cost;
5414
5415           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5416         }
5417
5418       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5419       iv_ca_set_add_invariants (ivs, cp->depends_on);
5420
5421       if (cp->inv_expr_id != -1)
5422         {
5423           ivs->used_inv_expr[cp->inv_expr_id]++;
5424           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5425             ivs->num_used_inv_expr++;
5426         }
5427       iv_ca_recount_cost (data, ivs);
5428     }
5429 }
5430
5431 /* Extend set IVS by expressing USE by some of the candidates in it
5432    if possible. All important candidates will be considered
5433    if IMPORTANT_CANDIDATES is true.  */
5434
5435 static void
5436 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5437                struct iv_use *use, bool important_candidates)
5438 {
5439   struct cost_pair *best_cp = NULL, *cp;
5440   bitmap_iterator bi;
5441   bitmap cands;
5442   unsigned i;
5443
5444   gcc_assert (ivs->upto >= use->id);
5445
5446   if (ivs->upto == use->id)
5447     {
5448       ivs->upto++;
5449       ivs->bad_uses++;
5450     }
5451
5452   cands = (important_candidates ? data->important_candidates : ivs->cands);
5453   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5454     {
5455       struct iv_cand *cand = iv_cand (data, i);
5456
5457       cp = get_use_iv_cost (data, use, cand);
5458
5459       if (cheaper_cost_pair (cp, best_cp))
5460         best_cp = cp;
5461     }
5462
5463   iv_ca_set_cp (data, ivs, use, best_cp);
5464 }
5465
5466 /* Get cost for assignment IVS.  */
5467
5468 static comp_cost
5469 iv_ca_cost (struct iv_ca *ivs)
5470 {
5471   /* This was a conditional expression but it triggered a bug in
5472      Sun C 5.5.  */
5473   if (ivs->bad_uses)
5474     return infinite_cost;
5475   else
5476     return ivs->cost;
5477 }
5478
5479 /* Returns true if all dependences of CP are among invariants in IVS.  */
5480
5481 static bool
5482 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5483 {
5484   unsigned i;
5485   bitmap_iterator bi;
5486
5487   if (!cp->depends_on)
5488     return true;
5489
5490   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5491     {
5492       if (ivs->n_invariant_uses[i] == 0)
5493         return false;
5494     }
5495
5496   return true;
5497 }
5498
5499 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5500    it before NEXT_CHANGE.  */
5501
5502 static struct iv_ca_delta *
5503 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5504                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5505 {
5506   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5507
5508   change->use = use;
5509   change->old_cp = old_cp;
5510   change->new_cp = new_cp;
5511   change->next_change = next_change;
5512
5513   return change;
5514 }
5515
5516 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5517    are rewritten.  */
5518
5519 static struct iv_ca_delta *
5520 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5521 {
5522   struct iv_ca_delta *last;
5523
5524   if (!l2)
5525     return l1;
5526
5527   if (!l1)
5528     return l2;
5529
5530   for (last = l1; last->next_change; last = last->next_change)
5531     continue;
5532   last->next_change = l2;
5533
5534   return l1;
5535 }
5536
5537 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5538
5539 static struct iv_ca_delta *
5540 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5541 {
5542   struct iv_ca_delta *act, *next, *prev = NULL;
5543   struct cost_pair *tmp;
5544
5545   for (act = delta; act; act = next)
5546     {
5547       next = act->next_change;
5548       act->next_change = prev;
5549       prev = act;
5550
5551       tmp = act->old_cp;
5552       act->old_cp = act->new_cp;
5553       act->new_cp = tmp;
5554     }
5555
5556   return prev;
5557 }
5558
5559 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5560    reverted instead.  */
5561
5562 static void
5563 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5564                     struct iv_ca_delta *delta, bool forward)
5565 {
5566   struct cost_pair *from, *to;
5567   struct iv_ca_delta *act;
5568
5569   if (!forward)
5570     delta = iv_ca_delta_reverse (delta);
5571
5572   for (act = delta; act; act = act->next_change)
5573     {
5574       from = act->old_cp;
5575       to = act->new_cp;
5576       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5577       iv_ca_set_cp (data, ivs, act->use, to);
5578     }
5579
5580   if (!forward)
5581     iv_ca_delta_reverse (delta);
5582 }
5583
5584 /* Returns true if CAND is used in IVS.  */
5585
5586 static bool
5587 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5588 {
5589   return ivs->n_cand_uses[cand->id] > 0;
5590 }
5591
5592 /* Returns number of induction variable candidates in the set IVS.  */
5593
5594 static unsigned
5595 iv_ca_n_cands (struct iv_ca *ivs)
5596 {
5597   return ivs->n_cands;
5598 }
5599
5600 /* Free the list of changes DELTA.  */
5601
5602 static void
5603 iv_ca_delta_free (struct iv_ca_delta **delta)
5604 {
5605   struct iv_ca_delta *act, *next;
5606
5607   for (act = *delta; act; act = next)
5608     {
5609       next = act->next_change;
5610       free (act);
5611     }
5612
5613   *delta = NULL;
5614 }
5615
5616 /* Allocates new iv candidates assignment.  */
5617
5618 static struct iv_ca *
5619 iv_ca_new (struct ivopts_data *data)
5620 {
5621   struct iv_ca *nw = XNEW (struct iv_ca);
5622
5623   nw->upto = 0;
5624   nw->bad_uses = 0;
5625   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5626   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5627   nw->cands = BITMAP_ALLOC (NULL);
5628   nw->n_cands = 0;
5629   nw->n_regs = 0;
5630   nw->cand_use_cost = no_cost;
5631   nw->cand_cost = 0;
5632   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5633   nw->cost = no_cost;
5634   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5635   nw->num_used_inv_expr = 0;
5636
5637   return nw;
5638 }
5639
5640 /* Free memory occupied by the set IVS.  */
5641
5642 static void
5643 iv_ca_free (struct iv_ca **ivs)
5644 {
5645   free ((*ivs)->cand_for_use);
5646   free ((*ivs)->n_cand_uses);
5647   BITMAP_FREE ((*ivs)->cands);
5648   free ((*ivs)->n_invariant_uses);
5649   free ((*ivs)->used_inv_expr);
5650   free (*ivs);
5651   *ivs = NULL;
5652 }
5653
5654 /* Dumps IVS to FILE.  */
5655
5656 static void
5657 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5658 {
5659   const char *pref = "  invariants ";
5660   unsigned i;
5661   comp_cost cost = iv_ca_cost (ivs);
5662
5663   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5664   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5665            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5666   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5667
5668    for (i = 0; i < ivs->upto; i++)
5669     {
5670       struct iv_use *use = iv_use (data, i);
5671       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5672       if (cp)
5673         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5674                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5675       else
5676         fprintf (file, "   use:%d --> ??\n", use->id);
5677     }
5678
5679   for (i = 1; i <= data->max_inv_id; i++)
5680     if (ivs->n_invariant_uses[i])
5681       {
5682         fprintf (file, "%s%d", pref, i);
5683         pref = ", ";
5684       }
5685   fprintf (file, "\n\n");
5686 }
5687
5688 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5689    new set, and store differences in DELTA.  Number of induction variables
5690    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5691    the function will try to find a solution with mimimal iv candidates.  */
5692
5693 static comp_cost
5694 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5695               struct iv_cand *cand, struct iv_ca_delta **delta,
5696               unsigned *n_ivs, bool min_ncand)
5697 {
5698   unsigned i;
5699   comp_cost cost;
5700   struct iv_use *use;
5701   struct cost_pair *old_cp, *new_cp;
5702
5703   *delta = NULL;
5704   for (i = 0; i < ivs->upto; i++)
5705     {
5706       use = iv_use (data, i);
5707       old_cp = iv_ca_cand_for_use (ivs, use);
5708
5709       if (old_cp
5710           && old_cp->cand == cand)
5711         continue;
5712
5713       new_cp = get_use_iv_cost (data, use, cand);
5714       if (!new_cp)
5715         continue;
5716
5717       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5718         continue;
5719
5720       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5721         continue;
5722
5723       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5724     }
5725
5726   iv_ca_delta_commit (data, ivs, *delta, true);
5727   cost = iv_ca_cost (ivs);
5728   if (n_ivs)
5729     *n_ivs = iv_ca_n_cands (ivs);
5730   iv_ca_delta_commit (data, ivs, *delta, false);
5731
5732   return cost;
5733 }
5734
5735 /* Try narrowing set IVS by removing CAND.  Return the cost of
5736    the new set and store the differences in DELTA.  */
5737
5738 static comp_cost
5739 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5740               struct iv_cand *cand, struct iv_ca_delta **delta)
5741 {
5742   unsigned i, ci;
5743   struct iv_use *use;
5744   struct cost_pair *old_cp, *new_cp, *cp;
5745   bitmap_iterator bi;
5746   struct iv_cand *cnd;
5747   comp_cost cost;
5748
5749   *delta = NULL;
5750   for (i = 0; i < n_iv_uses (data); i++)
5751     {
5752       use = iv_use (data, i);
5753
5754       old_cp = iv_ca_cand_for_use (ivs, use);
5755       if (old_cp->cand != cand)
5756         continue;
5757
5758       new_cp = NULL;
5759
5760       if (data->consider_all_candidates)
5761         {
5762           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5763             {
5764               if (ci == cand->id)
5765                 continue;
5766
5767               cnd = iv_cand (data, ci);
5768
5769               cp = get_use_iv_cost (data, use, cnd);
5770               if (!cp)
5771                 continue;
5772
5773               if (!iv_ca_has_deps (ivs, cp))
5774                 continue;
5775
5776               if (!cheaper_cost_pair (cp, new_cp))
5777                 continue;
5778
5779               new_cp = cp;
5780             }
5781         }
5782       else
5783         {
5784           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5785             {
5786               if (ci == cand->id)
5787                 continue;
5788
5789               cnd = iv_cand (data, ci);
5790
5791               cp = get_use_iv_cost (data, use, cnd);
5792               if (!cp)
5793                 continue;
5794               if (!iv_ca_has_deps (ivs, cp))
5795                 continue;
5796
5797               if (!cheaper_cost_pair (cp, new_cp))
5798                 continue;
5799
5800               new_cp = cp;
5801             }
5802         }
5803
5804       if (!new_cp)
5805         {
5806           iv_ca_delta_free (delta);
5807           return infinite_cost;
5808         }
5809
5810       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5811     }
5812
5813   iv_ca_delta_commit (data, ivs, *delta, true);
5814   cost = iv_ca_cost (ivs);
5815   iv_ca_delta_commit (data, ivs, *delta, false);
5816
5817   return cost;
5818 }
5819
5820 /* Try optimizing the set of candidates IVS by removing candidates different
5821    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5822    differences in DELTA.  */
5823
5824 static comp_cost
5825 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5826              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5827 {
5828   bitmap_iterator bi;
5829   struct iv_ca_delta *act_delta, *best_delta;
5830   unsigned i;
5831   comp_cost best_cost, acost;
5832   struct iv_cand *cand;
5833
5834   best_delta = NULL;
5835   best_cost = iv_ca_cost (ivs);
5836
5837   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5838     {
5839       cand = iv_cand (data, i);
5840
5841       if (cand == except_cand)
5842         continue;
5843
5844       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5845
5846       if (compare_costs (acost, best_cost) < 0)
5847         {
5848           best_cost = acost;
5849           iv_ca_delta_free (&best_delta);
5850           best_delta = act_delta;
5851         }
5852       else
5853         iv_ca_delta_free (&act_delta);
5854     }
5855
5856   if (!best_delta)
5857     {
5858       *delta = NULL;
5859       return best_cost;
5860     }
5861
5862   /* Recurse to possibly remove other unnecessary ivs.  */
5863   iv_ca_delta_commit (data, ivs, best_delta, true);
5864   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5865   iv_ca_delta_commit (data, ivs, best_delta, false);
5866   *delta = iv_ca_delta_join (best_delta, *delta);
5867   return best_cost;
5868 }
5869
5870 /* Tries to extend the sets IVS in the best possible way in order
5871    to express the USE.  If ORIGINALP is true, prefer candidates from
5872    the original set of IVs, otherwise favor important candidates not
5873    based on any memory object.  */
5874
5875 static bool
5876 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5877                   struct iv_use *use, bool originalp)
5878 {
5879   comp_cost best_cost, act_cost;
5880   unsigned i;
5881   bitmap_iterator bi;
5882   struct iv_cand *cand;
5883   struct iv_ca_delta *best_delta = NULL, *act_delta;
5884   struct cost_pair *cp;
5885
5886   iv_ca_add_use (data, ivs, use, false);
5887   best_cost = iv_ca_cost (ivs);
5888
5889   cp = iv_ca_cand_for_use (ivs, use);
5890   if (!cp)
5891     {
5892       ivs->upto--;
5893       ivs->bad_uses--;
5894       iv_ca_add_use (data, ivs, use, true);
5895       best_cost = iv_ca_cost (ivs);
5896       cp = iv_ca_cand_for_use (ivs, use);
5897     }
5898   if (cp)
5899     {
5900       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5901       iv_ca_set_no_cp (data, ivs, use);
5902     }
5903
5904   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5905      first try important candidates not based on any memory object.  Only if
5906      this fails, try the specific ones.  Rationale -- in loops with many
5907      variables the best choice often is to use just one generic biv.  If we
5908      added here many ivs specific to the uses, the optimization algorithm later
5909      would be likely to get stuck in a local minimum, thus causing us to create
5910      too many ivs.  The approach from few ivs to more seems more likely to be
5911      successful -- starting from few ivs, replacing an expensive use by a
5912      specific iv should always be a win.  */
5913   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5914     {
5915       cand = iv_cand (data, i);
5916
5917       if (originalp && cand->pos !=IP_ORIGINAL)
5918         continue;
5919
5920       if (!originalp && cand->iv->base_object != NULL_TREE)
5921         continue;
5922
5923       if (iv_ca_cand_used_p (ivs, cand))
5924         continue;
5925
5926       cp = get_use_iv_cost (data, use, cand);
5927       if (!cp)
5928         continue;
5929
5930       iv_ca_set_cp (data, ivs, use, cp);
5931       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5932                                true);
5933       iv_ca_set_no_cp (data, ivs, use);
5934       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5935
5936       if (compare_costs (act_cost, best_cost) < 0)
5937         {
5938           best_cost = act_cost;
5939
5940           iv_ca_delta_free (&best_delta);
5941           best_delta = act_delta;
5942         }
5943       else
5944         iv_ca_delta_free (&act_delta);
5945     }
5946
5947   if (infinite_cost_p (best_cost))
5948     {
5949       for (i = 0; i < use->n_map_members; i++)
5950         {
5951           cp = use->cost_map + i;
5952           cand = cp->cand;
5953           if (!cand)
5954             continue;
5955
5956           /* Already tried this.  */
5957           if (cand->important)
5958             {
5959               if (originalp && cand->pos == IP_ORIGINAL)
5960                 continue;
5961               if (!originalp && cand->iv->base_object == NULL_TREE)
5962                 continue;
5963             }
5964
5965           if (iv_ca_cand_used_p (ivs, cand))
5966             continue;
5967
5968           act_delta = NULL;
5969           iv_ca_set_cp (data, ivs, use, cp);
5970           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5971           iv_ca_set_no_cp (data, ivs, use);
5972           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5973                                        cp, act_delta);
5974
5975           if (compare_costs (act_cost, best_cost) < 0)
5976             {
5977               best_cost = act_cost;
5978
5979               if (best_delta)
5980                 iv_ca_delta_free (&best_delta);
5981               best_delta = act_delta;
5982             }
5983           else
5984             iv_ca_delta_free (&act_delta);
5985         }
5986     }
5987
5988   iv_ca_delta_commit (data, ivs, best_delta, true);
5989   iv_ca_delta_free (&best_delta);
5990
5991   return !infinite_cost_p (best_cost);
5992 }
5993
5994 /* Finds an initial assignment of candidates to uses.  */
5995
5996 static struct iv_ca *
5997 get_initial_solution (struct ivopts_data *data, bool originalp)
5998 {
5999   struct iv_ca *ivs = iv_ca_new (data);
6000   unsigned i;
6001
6002   for (i = 0; i < n_iv_uses (data); i++)
6003     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6004       {
6005         iv_ca_free (&ivs);
6006         return NULL;
6007       }
6008
6009   return ivs;
6010 }
6011
6012 /* Tries to improve set of induction variables IVS.  */
6013
6014 static bool
6015 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6016 {
6017   unsigned i, n_ivs;
6018   comp_cost acost, best_cost = iv_ca_cost (ivs);
6019   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6020   struct iv_cand *cand;
6021
6022   /* Try extending the set of induction variables by one.  */
6023   for (i = 0; i < n_iv_cands (data); i++)
6024     {
6025       cand = iv_cand (data, i);
6026
6027       if (iv_ca_cand_used_p (ivs, cand))
6028         continue;
6029
6030       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6031       if (!act_delta)
6032         continue;
6033
6034       /* If we successfully added the candidate and the set is small enough,
6035          try optimizing it by removing other candidates.  */
6036       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6037         {
6038           iv_ca_delta_commit (data, ivs, act_delta, true);
6039           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6040           iv_ca_delta_commit (data, ivs, act_delta, false);
6041           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6042         }
6043
6044       if (compare_costs (acost, best_cost) < 0)
6045         {
6046           best_cost = acost;
6047           iv_ca_delta_free (&best_delta);
6048           best_delta = act_delta;
6049         }
6050       else
6051         iv_ca_delta_free (&act_delta);
6052     }
6053
6054   if (!best_delta)
6055     {
6056       /* Try removing the candidates from the set instead.  */
6057       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6058
6059       /* Nothing more we can do.  */
6060       if (!best_delta)
6061         return false;
6062     }
6063
6064   iv_ca_delta_commit (data, ivs, best_delta, true);
6065   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6066   iv_ca_delta_free (&best_delta);
6067   return true;
6068 }
6069
6070 /* Attempts to find the optimal set of induction variables.  We do simple
6071    greedy heuristic -- we try to replace at most one candidate in the selected
6072    solution and remove the unused ivs while this improves the cost.  */
6073
6074 static struct iv_ca *
6075 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6076 {
6077   struct iv_ca *set;
6078
6079   /* Get the initial solution.  */
6080   set = get_initial_solution (data, originalp);
6081   if (!set)
6082     {
6083       if (dump_file && (dump_flags & TDF_DETAILS))
6084         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6085       return NULL;
6086     }
6087
6088   if (dump_file && (dump_flags & TDF_DETAILS))
6089     {
6090       fprintf (dump_file, "Initial set of candidates:\n");
6091       iv_ca_dump (data, dump_file, set);
6092     }
6093
6094   while (try_improve_iv_set (data, set))
6095     {
6096       if (dump_file && (dump_flags & TDF_DETAILS))
6097         {
6098           fprintf (dump_file, "Improved to:\n");
6099           iv_ca_dump (data, dump_file, set);
6100         }
6101     }
6102
6103   return set;
6104 }
6105
6106 static struct iv_ca *
6107 find_optimal_iv_set (struct ivopts_data *data)
6108 {
6109   unsigned i;
6110   struct iv_ca *set, *origset;
6111   struct iv_use *use;
6112   comp_cost cost, origcost;
6113
6114   /* Determine the cost based on a strategy that starts with original IVs,
6115      and try again using a strategy that prefers candidates not based
6116      on any IVs.  */
6117   origset = find_optimal_iv_set_1 (data, true);
6118   set = find_optimal_iv_set_1 (data, false);
6119
6120   if (!origset && !set)
6121     return NULL;
6122
6123   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6124   cost = set ? iv_ca_cost (set) : infinite_cost;
6125
6126   if (dump_file && (dump_flags & TDF_DETAILS))
6127     {
6128       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6129                origcost.cost, origcost.complexity);
6130       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6131                cost.cost, cost.complexity);
6132     }
6133
6134   /* Choose the one with the best cost.  */
6135   if (compare_costs (origcost, cost) <= 0)
6136     {
6137       if (set)
6138         iv_ca_free (&set);
6139       set = origset;
6140     }
6141   else if (origset)
6142     iv_ca_free (&origset);
6143
6144   for (i = 0; i < n_iv_uses (data); i++)
6145     {
6146       use = iv_use (data, i);
6147       use->selected = iv_ca_cand_for_use (set, use)->cand;
6148     }
6149
6150   return set;
6151 }
6152
6153 /* Creates a new induction variable corresponding to CAND.  */
6154
6155 static void
6156 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6157 {
6158   gimple_stmt_iterator incr_pos;
6159   tree base;
6160   bool after = false;
6161
6162   if (!cand->iv)
6163     return;
6164
6165   switch (cand->pos)
6166     {
6167     case IP_NORMAL:
6168       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6169       break;
6170
6171     case IP_END:
6172       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6173       after = true;
6174       break;
6175
6176     case IP_AFTER_USE:
6177       after = true;
6178       /* fall through */
6179     case IP_BEFORE_USE:
6180       incr_pos = gsi_for_stmt (cand->incremented_at);
6181       break;
6182
6183     case IP_ORIGINAL:
6184       /* Mark that the iv is preserved.  */
6185       name_info (data, cand->var_before)->preserve_biv = true;
6186       name_info (data, cand->var_after)->preserve_biv = true;
6187
6188       /* Rewrite the increment so that it uses var_before directly.  */
6189       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6190       return;
6191     }
6192
6193   gimple_add_tmp_var (cand->var_before);
6194
6195   base = unshare_expr (cand->iv->base);
6196
6197   create_iv (base, unshare_expr (cand->iv->step),
6198              cand->var_before, data->current_loop,
6199              &incr_pos, after, &cand->var_before, &cand->var_after);
6200 }
6201
6202 /* Creates new induction variables described in SET.  */
6203
6204 static void
6205 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6206 {
6207   unsigned i;
6208   struct iv_cand *cand;
6209   bitmap_iterator bi;
6210
6211   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6212     {
6213       cand = iv_cand (data, i);
6214       create_new_iv (data, cand);
6215     }
6216
6217   if (dump_file && (dump_flags & TDF_DETAILS))
6218     {
6219       fprintf (dump_file, "\nSelected IV set: \n");
6220       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6221         {
6222           cand = iv_cand (data, i);
6223           dump_cand (dump_file, cand);
6224         }
6225       fprintf (dump_file, "\n");
6226     }
6227 }
6228
6229 /* Rewrites USE (definition of iv used in a nonlinear expression)
6230    using candidate CAND.  */
6231
6232 static void
6233 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6234                             struct iv_use *use, struct iv_cand *cand)
6235 {
6236   tree comp;
6237   tree op, tgt;
6238   gimple ass;
6239   gimple_stmt_iterator bsi;
6240
6241   /* An important special case -- if we are asked to express value of
6242      the original iv by itself, just exit; there is no need to
6243      introduce a new computation (that might also need casting the
6244      variable to unsigned and back).  */
6245   if (cand->pos == IP_ORIGINAL
6246       && cand->incremented_at == use->stmt)
6247     {
6248       enum tree_code stmt_code;
6249
6250       gcc_assert (is_gimple_assign (use->stmt));
6251       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6252
6253       /* Check whether we may leave the computation unchanged.
6254          This is the case only if it does not rely on other
6255          computations in the loop -- otherwise, the computation
6256          we rely upon may be removed in remove_unused_ivs,
6257          thus leading to ICE.  */
6258       stmt_code = gimple_assign_rhs_code (use->stmt);
6259       if (stmt_code == PLUS_EXPR
6260           || stmt_code == MINUS_EXPR
6261           || stmt_code == POINTER_PLUS_EXPR)
6262         {
6263           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6264             op = gimple_assign_rhs2 (use->stmt);
6265           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6266             op = gimple_assign_rhs1 (use->stmt);
6267           else
6268             op = NULL_TREE;
6269         }
6270       else
6271         op = NULL_TREE;
6272
6273       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6274         return;
6275     }
6276
6277   comp = get_computation (data->current_loop, use, cand);
6278   gcc_assert (comp != NULL_TREE);
6279
6280   switch (gimple_code (use->stmt))
6281     {
6282     case GIMPLE_PHI:
6283       tgt = PHI_RESULT (use->stmt);
6284
6285       /* If we should keep the biv, do not replace it.  */
6286       if (name_info (data, tgt)->preserve_biv)
6287         return;
6288
6289       bsi = gsi_after_labels (gimple_bb (use->stmt));
6290       break;
6291
6292     case GIMPLE_ASSIGN:
6293       tgt = gimple_assign_lhs (use->stmt);
6294       bsi = gsi_for_stmt (use->stmt);
6295       break;
6296
6297     default:
6298       gcc_unreachable ();
6299     }
6300
6301   if (!valid_gimple_rhs_p (comp)
6302       || (gimple_code (use->stmt) != GIMPLE_PHI
6303           /* We can't allow re-allocating the stmt as it might be pointed
6304              to still.  */
6305           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6306               >= gimple_num_ops (gsi_stmt (bsi)))))
6307     {
6308       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6309                                        true, GSI_SAME_STMT);
6310       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6311         {
6312           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6313           /* As this isn't a plain copy we have to reset alignment
6314              information.  */
6315           if (SSA_NAME_PTR_INFO (comp))
6316             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6317         }
6318     }
6319
6320   if (gimple_code (use->stmt) == GIMPLE_PHI)
6321     {
6322       ass = gimple_build_assign (tgt, comp);
6323       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6324
6325       bsi = gsi_for_stmt (use->stmt);
6326       remove_phi_node (&bsi, false);
6327     }
6328   else
6329     {
6330       gimple_assign_set_rhs_from_tree (&bsi, comp);
6331       use->stmt = gsi_stmt (bsi);
6332     }
6333 }
6334
6335 /* Performs a peephole optimization to reorder the iv update statement with
6336    a mem ref to enable instruction combining in later phases. The mem ref uses
6337    the iv value before the update, so the reordering transformation requires
6338    adjustment of the offset. CAND is the selected IV_CAND.
6339
6340    Example:
6341
6342    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6343    iv2 = iv1 + 1;
6344
6345    if (t < val)      (1)
6346      goto L;
6347    goto Head;
6348
6349
6350    directly propagating t over to (1) will introduce overlapping live range
6351    thus increase register pressure. This peephole transform it into:
6352
6353
6354    iv2 = iv1 + 1;
6355    t = MEM_REF (base, iv2, 8, 8);
6356    if (t < val)
6357      goto L;
6358    goto Head;
6359 */
6360
6361 static void
6362 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6363 {
6364   tree var_after;
6365   gimple iv_update, stmt;
6366   basic_block bb;
6367   gimple_stmt_iterator gsi, gsi_iv;
6368
6369   if (cand->pos != IP_NORMAL)
6370     return;
6371
6372   var_after = cand->var_after;
6373   iv_update = SSA_NAME_DEF_STMT (var_after);
6374
6375   bb = gimple_bb (iv_update);
6376   gsi = gsi_last_nondebug_bb (bb);
6377   stmt = gsi_stmt (gsi);
6378
6379   /* Only handle conditional statement for now.  */
6380   if (gimple_code (stmt) != GIMPLE_COND)
6381     return;
6382
6383   gsi_prev_nondebug (&gsi);
6384   stmt = gsi_stmt (gsi);
6385   if (stmt != iv_update)
6386     return;
6387
6388   gsi_prev_nondebug (&gsi);
6389   if (gsi_end_p (gsi))
6390     return;
6391
6392   stmt = gsi_stmt (gsi);
6393   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6394     return;
6395
6396   if (stmt != use->stmt)
6397     return;
6398
6399   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6400     return;
6401
6402   if (dump_file && (dump_flags & TDF_DETAILS))
6403     {
6404       fprintf (dump_file, "Reordering \n");
6405       print_gimple_stmt (dump_file, iv_update, 0, 0);
6406       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6407       fprintf (dump_file, "\n");
6408     }
6409
6410   gsi = gsi_for_stmt (use->stmt);
6411   gsi_iv = gsi_for_stmt (iv_update);
6412   gsi_move_before (&gsi_iv, &gsi);
6413
6414   cand->pos = IP_BEFORE_USE;
6415   cand->incremented_at = use->stmt;
6416 }
6417
6418 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6419
6420 static void
6421 rewrite_use_address (struct ivopts_data *data,
6422                      struct iv_use *use, struct iv_cand *cand)
6423 {
6424   aff_tree aff;
6425   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6426   tree base_hint = NULL_TREE;
6427   tree ref, iv;
6428   bool ok;
6429
6430   adjust_iv_update_pos (cand, use);
6431   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6432   gcc_assert (ok);
6433   unshare_aff_combination (&aff);
6434
6435   /* To avoid undefined overflow problems, all IV candidates use unsigned
6436      integer types.  The drawback is that this makes it impossible for
6437      create_mem_ref to distinguish an IV that is based on a memory object
6438      from one that represents simply an offset.
6439
6440      To work around this problem, we pass a hint to create_mem_ref that
6441      indicates which variable (if any) in aff is an IV based on a memory
6442      object.  Note that we only consider the candidate.  If this is not
6443      based on an object, the base of the reference is in some subexpression
6444      of the use -- but these will use pointer types, so they are recognized
6445      by the create_mem_ref heuristics anyway.  */
6446   if (cand->iv->base_object)
6447     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6448
6449   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6450   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6451                         reference_alias_ptr_type (*use->op_p),
6452                         iv, base_hint, data->speed);
6453   copy_ref_info (ref, *use->op_p);
6454   *use->op_p = ref;
6455 }
6456
6457 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6458    candidate CAND.  */
6459
6460 static void
6461 rewrite_use_compare (struct ivopts_data *data,
6462                      struct iv_use *use, struct iv_cand *cand)
6463 {
6464   tree comp, *var_p, op, bound;
6465   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6466   enum tree_code compare;
6467   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6468   bool ok;
6469
6470   bound = cp->value;
6471   if (bound)
6472     {
6473       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6474       tree var_type = TREE_TYPE (var);
6475       gimple_seq stmts;
6476
6477       if (dump_file && (dump_flags & TDF_DETAILS))
6478         {
6479           fprintf (dump_file, "Replacing exit test: ");
6480           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6481         }
6482       compare = cp->comp;
6483       bound = unshare_expr (fold_convert (var_type, bound));
6484       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6485       if (stmts)
6486         gsi_insert_seq_on_edge_immediate (
6487                 loop_preheader_edge (data->current_loop),
6488                 stmts);
6489
6490       gimple_cond_set_lhs (use->stmt, var);
6491       gimple_cond_set_code (use->stmt, compare);
6492       gimple_cond_set_rhs (use->stmt, op);
6493       return;
6494     }
6495
6496   /* The induction variable elimination failed; just express the original
6497      giv.  */
6498   comp = get_computation (data->current_loop, use, cand);
6499   gcc_assert (comp != NULL_TREE);
6500
6501   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6502   gcc_assert (ok);
6503
6504   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6505                                      true, GSI_SAME_STMT);
6506 }
6507
6508 /* Rewrites USE using candidate CAND.  */
6509
6510 static void
6511 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6512 {
6513   switch (use->type)
6514     {
6515       case USE_NONLINEAR_EXPR:
6516         rewrite_use_nonlinear_expr (data, use, cand);
6517         break;
6518
6519       case USE_ADDRESS:
6520         rewrite_use_address (data, use, cand);
6521         break;
6522
6523       case USE_COMPARE:
6524         rewrite_use_compare (data, use, cand);
6525         break;
6526
6527       default:
6528         gcc_unreachable ();
6529     }
6530
6531   update_stmt (use->stmt);
6532 }
6533
6534 /* Rewrite the uses using the selected induction variables.  */
6535
6536 static void
6537 rewrite_uses (struct ivopts_data *data)
6538 {
6539   unsigned i;
6540   struct iv_cand *cand;
6541   struct iv_use *use;
6542
6543   for (i = 0; i < n_iv_uses (data); i++)
6544     {
6545       use = iv_use (data, i);
6546       cand = use->selected;
6547       gcc_assert (cand);
6548
6549       rewrite_use (data, use, cand);
6550     }
6551 }
6552
6553 /* Removes the ivs that are not used after rewriting.  */
6554
6555 static void
6556 remove_unused_ivs (struct ivopts_data *data)
6557 {
6558   unsigned j;
6559   bitmap_iterator bi;
6560   bitmap toremove = BITMAP_ALLOC (NULL);
6561
6562   /* Figure out an order in which to release SSA DEFs so that we don't
6563      release something that we'd have to propagate into a debug stmt
6564      afterwards.  */
6565   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6566     {
6567       struct version_info *info;
6568
6569       info = ver_info (data, j);
6570       if (info->iv
6571           && !integer_zerop (info->iv->step)
6572           && !info->inv_id
6573           && !info->iv->have_use_for
6574           && !info->preserve_biv)
6575         {
6576           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6577
6578           tree def = info->iv->ssa_name;
6579
6580           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6581             {
6582               imm_use_iterator imm_iter;
6583               use_operand_p use_p;
6584               gimple stmt;
6585               int count = 0;
6586
6587               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6588                 {
6589                   if (!gimple_debug_bind_p (stmt))
6590                     continue;
6591
6592                   /* We just want to determine whether to do nothing
6593                      (count == 0), to substitute the computed
6594                      expression into a single use of the SSA DEF by
6595                      itself (count == 1), or to use a debug temp
6596                      because the SSA DEF is used multiple times or as
6597                      part of a larger expression (count > 1). */
6598                   count++;
6599                   if (gimple_debug_bind_get_value (stmt) != def)
6600                     count++;
6601
6602                   if (count > 1)
6603                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6604                 }
6605
6606               if (!count)
6607                 continue;
6608
6609               struct iv_use dummy_use;
6610               struct iv_cand *best_cand = NULL, *cand;
6611               unsigned i, best_pref = 0, cand_pref;
6612
6613               memset (&dummy_use, 0, sizeof (dummy_use));
6614               dummy_use.iv = info->iv;
6615               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6616                 {
6617                   cand = iv_use (data, i)->selected;
6618                   if (cand == best_cand)
6619                     continue;
6620                   cand_pref = operand_equal_p (cand->iv->step,
6621                                                info->iv->step, 0)
6622                     ? 4 : 0;
6623                   cand_pref
6624                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6625                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6626                     ? 2 : 0;
6627                   cand_pref
6628                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6629                     ? 1 : 0;
6630                   if (best_cand == NULL || best_pref < cand_pref)
6631                     {
6632                       best_cand = cand;
6633                       best_pref = cand_pref;
6634                     }
6635                 }
6636
6637               if (!best_cand)
6638                 continue;
6639
6640               tree comp = get_computation_at (data->current_loop,
6641                                               &dummy_use, best_cand,
6642                                               SSA_NAME_DEF_STMT (def));
6643               if (!comp)
6644                 continue;
6645
6646               if (count > 1)
6647                 {
6648                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6649                   DECL_ARTIFICIAL (vexpr) = 1;
6650                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6651                   if (SSA_NAME_VAR (def))
6652                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6653                   else
6654                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6655                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6656                   gimple_stmt_iterator gsi;
6657
6658                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6659                     gsi = gsi_after_labels (gimple_bb
6660                                             (SSA_NAME_DEF_STMT (def)));
6661                   else
6662                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6663
6664                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6665                   comp = vexpr;
6666                 }
6667
6668               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6669                 {
6670                   if (!gimple_debug_bind_p (stmt))
6671                     continue;
6672
6673                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6674                     SET_USE (use_p, comp);
6675
6676                   update_stmt (stmt);
6677                 }
6678             }
6679         }
6680     }
6681
6682   release_defs_bitset (toremove);
6683
6684   BITMAP_FREE (toremove);
6685 }
6686
6687 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6688    for pointer_map_traverse.  */
6689
6690 static bool
6691 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6692                       void *data ATTRIBUTE_UNUSED)
6693 {
6694   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6695
6696   free (niter);
6697   return true;
6698 }
6699
6700 /* Frees data allocated by the optimization of a single loop.  */
6701
6702 static void
6703 free_loop_data (struct ivopts_data *data)
6704 {
6705   unsigned i, j;
6706   bitmap_iterator bi;
6707   tree obj;
6708
6709   if (data->niters)
6710     {
6711       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6712       pointer_map_destroy (data->niters);
6713       data->niters = NULL;
6714     }
6715
6716   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6717     {
6718       struct version_info *info;
6719
6720       info = ver_info (data, i);
6721       free (info->iv);
6722       info->iv = NULL;
6723       info->has_nonlin_use = false;
6724       info->preserve_biv = false;
6725       info->inv_id = 0;
6726     }
6727   bitmap_clear (data->relevant);
6728   bitmap_clear (data->important_candidates);
6729
6730   for (i = 0; i < n_iv_uses (data); i++)
6731     {
6732       struct iv_use *use = iv_use (data, i);
6733
6734       free (use->iv);
6735       BITMAP_FREE (use->related_cands);
6736       for (j = 0; j < use->n_map_members; j++)
6737         if (use->cost_map[j].depends_on)
6738           BITMAP_FREE (use->cost_map[j].depends_on);
6739       free (use->cost_map);
6740       free (use);
6741     }
6742   data->iv_uses.truncate (0);
6743
6744   for (i = 0; i < n_iv_cands (data); i++)
6745     {
6746       struct iv_cand *cand = iv_cand (data, i);
6747
6748       free (cand->iv);
6749       if (cand->depends_on)
6750         BITMAP_FREE (cand->depends_on);
6751       free (cand);
6752     }
6753   data->iv_candidates.truncate (0);
6754
6755   if (data->version_info_size < num_ssa_names)
6756     {
6757       data->version_info_size = 2 * num_ssa_names;
6758       free (data->version_info);
6759       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6760     }
6761
6762   data->max_inv_id = 0;
6763
6764   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6765     SET_DECL_RTL (obj, NULL_RTX);
6766
6767   decl_rtl_to_reset.truncate (0);
6768
6769   data->inv_expr_tab.empty ();
6770   data->inv_expr_id = 0;
6771 }
6772
6773 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6774    loop tree.  */
6775
6776 static void
6777 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6778 {
6779   free_loop_data (data);
6780   free (data->version_info);
6781   BITMAP_FREE (data->relevant);
6782   BITMAP_FREE (data->important_candidates);
6783
6784   decl_rtl_to_reset.release ();
6785   data->iv_uses.release ();
6786   data->iv_candidates.release ();
6787   data->inv_expr_tab.dispose ();
6788 }
6789
6790 /* Returns true if the loop body BODY includes any function calls.  */
6791
6792 static bool
6793 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6794 {
6795   gimple_stmt_iterator gsi;
6796   unsigned i;
6797
6798   for (i = 0; i < num_nodes; i++)
6799     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6800       {
6801         gimple stmt = gsi_stmt (gsi);
6802         if (is_gimple_call (stmt)
6803             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6804           return true;
6805       }
6806   return false;
6807 }
6808
6809 /* Optimizes the LOOP.  Returns true if anything changed.  */
6810
6811 static bool
6812 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6813 {
6814   bool changed = false;
6815   struct iv_ca *iv_ca;
6816   edge exit = single_dom_exit (loop);
6817   basic_block *body;
6818
6819   gcc_assert (!data->niters);
6820   data->current_loop = loop;
6821   data->speed = optimize_loop_for_speed_p (loop);
6822
6823   if (dump_file && (dump_flags & TDF_DETAILS))
6824     {
6825       fprintf (dump_file, "Processing loop %d\n", loop->num);
6826
6827       if (exit)
6828         {
6829           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6830                    exit->src->index, exit->dest->index);
6831           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6832           fprintf (dump_file, "\n");
6833         }
6834
6835       fprintf (dump_file, "\n");
6836     }
6837
6838   body = get_loop_body (loop);
6839   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6840   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6841   free (body);
6842
6843   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6844
6845   /* For each ssa name determines whether it behaves as an induction variable
6846      in some loop.  */
6847   if (!find_induction_variables (data))
6848     goto finish;
6849
6850   /* Finds interesting uses (item 1).  */
6851   find_interesting_uses (data);
6852   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6853     goto finish;
6854
6855   /* Finds candidates for the induction variables (item 2).  */
6856   find_iv_candidates (data);
6857
6858   /* Calculates the costs (item 3, part 1).  */
6859   determine_iv_costs (data);
6860   determine_use_iv_costs (data);
6861   determine_set_costs (data);
6862
6863   /* Find the optimal set of induction variables (item 3, part 2).  */
6864   iv_ca = find_optimal_iv_set (data);
6865   if (!iv_ca)
6866     goto finish;
6867   changed = true;
6868
6869   /* Create the new induction variables (item 4, part 1).  */
6870   create_new_ivs (data, iv_ca);
6871   iv_ca_free (&iv_ca);
6872
6873   /* Rewrite the uses (item 4, part 2).  */
6874   rewrite_uses (data);
6875
6876   /* Remove the ivs that are unused after rewriting.  */
6877   remove_unused_ivs (data);
6878
6879   /* We have changed the structure of induction variables; it might happen
6880      that definitions in the scev database refer to some of them that were
6881      eliminated.  */
6882   scev_reset ();
6883
6884 finish:
6885   free_loop_data (data);
6886
6887   return changed;
6888 }
6889
6890 /* Main entry point.  Optimizes induction variables in loops.  */
6891
6892 void
6893 tree_ssa_iv_optimize (void)
6894 {
6895   struct loop *loop;
6896   struct ivopts_data data;
6897
6898   tree_ssa_iv_optimize_init (&data);
6899
6900   /* Optimize the loops starting with the innermost ones.  */
6901   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6902     {
6903       if (dump_file && (dump_flags & TDF_DETAILS))
6904         flow_loop_dump (loop, dump_file, NULL, 1);
6905
6906       tree_ssa_iv_optimize_loop (&data, loop);
6907     }
6908
6909   tree_ssa_iv_optimize_finalize (&data);
6910 }