gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "pointer-set.h"
  74 #include "hash-table.h"
  75 #include "tree-ssa-alias.h"
  76 #include "internal-fn.h"
  77 #include "tree-eh.h"
  78 #include "gimple-expr.h"
  79 #include "is-a.h"
  80 #include "gimple.h"
  81 #include "gimplify.h"
  82 #include "gimple-iterator.h"
  83 #include "gimplify-me.h"
  84 #include "gimple-ssa.h"
  85 #include "cgraph.h"
  86 #include "tree-cfg.h"
  87 #include "tree-phinodes.h"
  88 #include "ssa-iterators.h"
  89 #include "stringpool.h"
  90 #include "tree-ssanames.h"
  91 #include "tree-ssa-loop-ivopts.h"
  92 #include "tree-ssa-loop-manip.h"
  93 #include "tree-ssa-loop-niter.h"
  94 #include "tree-ssa-loop.h"
  95 #include "expr.h"
  96 #include "tree-dfa.h"
  97 #include "tree-ssa.h"
  98 #include "cfgloop.h"
  99 #include "tree-pass.h"
 100 #include "insn-config.h"
 101 #include "tree-chrec.h"
 102 #include "tree-scalar-evolution.h"
 103 #include "cfgloop.h"
 104 #include "params.h"
 105 #include "langhooks.h"
 106 #include "tree-affine.h"
 107 #include "target.h"
 108 #include "tree-inline.h"
 109 #include "tree-ssa-propagate.h"
 110 #include "expmed.h"
 111 #include "tree-ssa-address.h"
 112
 113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 114    cost of different addressing modes.  This should be moved to a TBD
 115    interface between the GIMPLE and RTL worlds.  */
 116 #include "expr.h"
 117 #include "recog.h"
 118
 119 /* The infinite cost.  */
 120 #define INFTY 10000000
 121
 122 #define AVG_LOOP_NITER(LOOP) 5
 123
 124 /* Returns the expected number of loop iterations for LOOP.
 125    The average trip count is computed from profile data if it
 126    exists. */
 127
 128 static inline HOST_WIDE_INT
 129 avg_loop_niter (struct loop *loop)
 130 {
 131   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 132   if (niter == -1)
 133     return AVG_LOOP_NITER (loop);
 134
 135   return niter;
 136 }
 137
 138 /* Representation of the induction variable.  */
 139 struct iv
 140 {
 141   tree base;            /* Initial value of the iv.  */
 142   tree base_object;     /* A memory object to that the induction variable points.  */
 143   tree step;            /* Step of the iv (constant only).  */
 144   tree ssa_name;        /* The ssa name with the value.  */
 145   bool biv_p;           /* Is it a biv?  */
 146   bool have_use_for;    /* Do we already have a use for it?  */
 147   unsigned use_id;      /* The identifier in the use if it is the case.  */
 148 };
 149
 150 /* Per-ssa version information (induction variable descriptions, etc.).  */
 151 struct version_info
 152 {
 153   tree name;            /* The ssa name.  */
 154   struct iv *iv;        /* Induction variable description.  */
 155   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 156                            an expression that is not an induction variable.  */
 157   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 158   unsigned inv_id;      /* Id of an invariant.  */
 159 };
 160
 161 /* Types of uses.  */
 162 enum use_type
 163 {
 164   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 165   USE_ADDRESS,          /* Use in an address.  */
 166   USE_COMPARE           /* Use is a compare.  */
 167 };
 168
 169 /* Cost of a computation.  */
 170 typedef struct
 171 {
 172   int cost;             /* The runtime cost.  */
 173   unsigned complexity;  /* The estimate of the complexity of the code for
 174                            the computation (in no concrete units --
 175                            complexity field should be larger for more
 176                            complex expressions and addressing modes).  */
 177 } comp_cost;
 178
 179 static const comp_cost no_cost = {0, 0};
 180 static const comp_cost infinite_cost = {INFTY, INFTY};
 181
 182 /* The candidate - cost pair.  */
 183 struct cost_pair
 184 {
 185   struct iv_cand *cand; /* The candidate.  */
 186   comp_cost cost;       /* The cost.  */
 187   bitmap depends_on;    /* The list of invariants that have to be
 188                            preserved.  */
 189   tree value;           /* For final value elimination, the expression for
 190                            the final value of the iv.  For iv elimination,
 191                            the new bound to compare with.  */
 192   enum tree_code comp;  /* For iv elimination, the comparison.  */
 193   int inv_expr_id;      /* Loop invariant expression id.  */
 194 };
 195
 196 /* Use.  */
 197 struct iv_use
 198 {
 199   unsigned id;          /* The id of the use.  */
 200   enum use_type type;   /* Type of the use.  */
 201   struct iv *iv;        /* The induction variable it is based on.  */
 202   gimple stmt;          /* Statement in that it occurs.  */
 203   tree *op_p;           /* The place where it occurs.  */
 204   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 205                            important ones.  */
 206
 207   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 208   struct cost_pair *cost_map;
 209                         /* The costs wrto the iv candidates.  */
 210
 211   struct iv_cand *selected;
 212                         /* The selected candidate.  */
 213 };
 214
 215 /* The position where the iv is computed.  */
 216 enum iv_position
 217 {
 218   IP_NORMAL,            /* At the end, just before the exit condition.  */
 219   IP_END,               /* At the end of the latch block.  */
 220   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 221   IP_AFTER_USE,         /* Immediately after a specific use.  */
 222   IP_ORIGINAL           /* The original biv.  */
 223 };
 224
 225 /* The induction variable candidate.  */
 226 struct iv_cand
 227 {
 228   unsigned id;          /* The number of the candidate.  */
 229   bool important;       /* Whether this is an "important" candidate, i.e. such
 230                            that it should be considered by all uses.  */
 231   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 232   gimple incremented_at;/* For original biv, the statement where it is
 233                            incremented.  */
 234   tree var_before;      /* The variable used for it before increment.  */
 235   tree var_after;       /* The variable used for it after increment.  */
 236   struct iv *iv;        /* The value of the candidate.  NULL for
 237                            "pseudocandidate" used to indicate the possibility
 238                            to replace the final value of an iv by direct
 239                            computation of the value.  */
 240   unsigned cost;        /* Cost of the candidate.  */
 241   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 242   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 243                               where it is incremented.  */
 244   bitmap depends_on;    /* The list of invariants that are used in step of the
 245                            biv.  */
 246 };
 247
 248 /* Loop invariant expression hashtable entry.  */
 249 struct iv_inv_expr_ent
 250 {
 251   tree expr;
 252   int id;
 253   hashval_t hash;
 254 };
 255
 256 /* The data used by the induction variable optimizations.  */
 257
 258 typedef struct iv_use *iv_use_p;
 259
 260 typedef struct iv_cand *iv_cand_p;
 261
 262 /* Hashtable helpers.  */
 263
 264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 265 {
 266   typedef iv_inv_expr_ent value_type;
 267   typedef iv_inv_expr_ent compare_type;
 268   static inline hashval_t hash (const value_type *);
 269   static inline bool equal (const value_type *, const compare_type *);
 270 };
 271
 272 /* Hash function for loop invariant expressions.  */
 273
 274 inline hashval_t
 275 iv_inv_expr_hasher::hash (const value_type *expr)
 276 {
 277   return expr->hash;
 278 }
 279
 280 /* Hash table equality function for expressions.  */
 281
 282 inline bool
 283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 284 {
 285   return expr1->hash == expr2->hash
 286          && operand_equal_p (expr1->expr, expr2->expr, 0);
 287 }
 288
 289 struct ivopts_data
 290 {
 291   /* The currently optimized loop.  */
 292   struct loop *current_loop;
 293
 294   /* Numbers of iterations for all exits of the current loop.  */
 295   struct pointer_map_t *niters;
 296
 297   /* Number of registers used in it.  */
 298   unsigned regs_used;
 299
 300   /* The size of version_info array allocated.  */
 301   unsigned version_info_size;
 302
 303   /* The array of information for the ssa names.  */
 304   struct version_info *version_info;
 305
 306   /* The hashtable of loop invariant expressions created
 307      by ivopt.  */
 308   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 309
 310   /* Loop invariant expression id.  */
 311   int inv_expr_id;
 312
 313   /* The bitmap of indices in version_info whose value was changed.  */
 314   bitmap relevant;
 315
 316   /* The uses of induction variables.  */
 317   vec<iv_use_p> iv_uses;
 318
 319   /* The candidates.  */
 320   vec<iv_cand_p> iv_candidates;
 321
 322   /* A bitmap of important candidates.  */
 323   bitmap important_candidates;
 324
 325   /* The maximum invariant id.  */
 326   unsigned max_inv_id;
 327
 328   /* Whether to consider just related and important candidates when replacing a
 329      use.  */
 330   bool consider_all_candidates;
 331
 332   /* Are we optimizing for speed?  */
 333   bool speed;
 334
 335   /* Whether the loop body includes any function calls.  */
 336   bool body_includes_call;
 337
 338   /* Whether the loop body can only be exited via single exit.  */
 339   bool loop_single_exit_p;
 340 };
 341
 342 /* An assignment of iv candidates to uses.  */
 343
 344 struct iv_ca
 345 {
 346   /* The number of uses covered by the assignment.  */
 347   unsigned upto;
 348
 349   /* Number of uses that cannot be expressed by the candidates in the set.  */
 350   unsigned bad_uses;
 351
 352   /* Candidate assigned to a use, together with the related costs.  */
 353   struct cost_pair **cand_for_use;
 354
 355   /* Number of times each candidate is used.  */
 356   unsigned *n_cand_uses;
 357
 358   /* The candidates used.  */
 359   bitmap cands;
 360
 361   /* The number of candidates in the set.  */
 362   unsigned n_cands;
 363
 364   /* Total number of registers needed.  */
 365   unsigned n_regs;
 366
 367   /* Total cost of expressing uses.  */
 368   comp_cost cand_use_cost;
 369
 370   /* Total cost of candidates.  */
 371   unsigned cand_cost;
 372
 373   /* Number of times each invariant is used.  */
 374   unsigned *n_invariant_uses;
 375
 376   /* The array holding the number of uses of each loop
 377      invariant expressions created by ivopt.  */
 378   unsigned *used_inv_expr;
 379
 380   /* The number of created loop invariants.  */
 381   unsigned num_used_inv_expr;
 382
 383   /* Total cost of the assignment.  */
 384   comp_cost cost;
 385 };
 386
 387 /* Difference of two iv candidate assignments.  */
 388
 389 struct iv_ca_delta
 390 {
 391   /* Changed use.  */
 392   struct iv_use *use;
 393
 394   /* An old assignment (for rollback purposes).  */
 395   struct cost_pair *old_cp;
 396
 397   /* A new assignment.  */
 398   struct cost_pair *new_cp;
 399
 400   /* Next change in the list.  */
 401   struct iv_ca_delta *next_change;
 402 };
 403
 404 /* Bound on number of candidates below that all candidates are considered.  */
 405
 406 #define CONSIDER_ALL_CANDIDATES_BOUND \
 407   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 408
 409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 410    optimizing such a loop would help, and it would take ages).  */
 411
 412 #define MAX_CONSIDERED_USES \
 413   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 414
 415 /* If there are at most this number of ivs in the set, try removing unnecessary
 416    ivs from the set always.  */
 417
 418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 419   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 420
 421 /* The list of trees for that the decl_rtl field must be reset is stored
 422    here.  */
 423
 424 static vec<tree> decl_rtl_to_reset;
 425
 426 static comp_cost force_expr_to_var_cost (tree, bool);
 427
 428 /* Number of uses recorded in DATA.  */
 429
 430 static inline unsigned
 431 n_iv_uses (struct ivopts_data *data)
 432 {
 433   return data->iv_uses.length ();
 434 }
 435
 436 /* Ith use recorded in DATA.  */
 437
 438 static inline struct iv_use *
 439 iv_use (struct ivopts_data *data, unsigned i)
 440 {
 441   return data->iv_uses[i];
 442 }
 443
 444 /* Number of candidates recorded in DATA.  */
 445
 446 static inline unsigned
 447 n_iv_cands (struct ivopts_data *data)
 448 {
 449   return data->iv_candidates.length ();
 450 }
 451
 452 /* Ith candidate recorded in DATA.  */
 453
 454 static inline struct iv_cand *
 455 iv_cand (struct ivopts_data *data, unsigned i)
 456 {
 457   return data->iv_candidates[i];
 458 }
 459
 460 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 461
 462 edge
 463 single_dom_exit (struct loop *loop)
 464 {
 465   edge exit = single_exit (loop);
 466
 467   if (!exit)
 468     return NULL;
 469
 470   if (!just_once_each_iteration_p (loop, exit->src))
 471     return NULL;
 472
 473   return exit;
 474 }
 475
 476 /* Dumps information about the induction variable IV to FILE.  */
 477
 478 void
 479 dump_iv (FILE *file, struct iv *iv)
 480 {
 481   if (iv->ssa_name)
 482     {
 483       fprintf (file, "ssa name ");
 484       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 485       fprintf (file, "\n");
 486     }
 487
 488   fprintf (file, "  type ");
 489   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 490   fprintf (file, "\n");
 491
 492   if (iv->step)
 493     {
 494       fprintf (file, "  base ");
 495       print_generic_expr (file, iv->base, TDF_SLIM);
 496       fprintf (file, "\n");
 497
 498       fprintf (file, "  step ");
 499       print_generic_expr (file, iv->step, TDF_SLIM);
 500       fprintf (file, "\n");
 501     }
 502   else
 503     {
 504       fprintf (file, "  invariant ");
 505       print_generic_expr (file, iv->base, TDF_SLIM);
 506       fprintf (file, "\n");
 507     }
 508
 509   if (iv->base_object)
 510     {
 511       fprintf (file, "  base object ");
 512       print_generic_expr (file, iv->base_object, TDF_SLIM);
 513       fprintf (file, "\n");
 514     }
 515
 516   if (iv->biv_p)
 517     fprintf (file, "  is a biv\n");
 518 }
 519
 520 /* Dumps information about the USE to FILE.  */
 521
 522 void
 523 dump_use (FILE *file, struct iv_use *use)
 524 {
 525   fprintf (file, "use %d\n", use->id);
 526
 527   switch (use->type)
 528     {
 529     case USE_NONLINEAR_EXPR:
 530       fprintf (file, "  generic\n");
 531       break;
 532
 533     case USE_ADDRESS:
 534       fprintf (file, "  address\n");
 535       break;
 536
 537     case USE_COMPARE:
 538       fprintf (file, "  compare\n");
 539       break;
 540
 541     default:
 542       gcc_unreachable ();
 543     }
 544
 545   fprintf (file, "  in statement ");
 546   print_gimple_stmt (file, use->stmt, 0, 0);
 547   fprintf (file, "\n");
 548
 549   fprintf (file, "  at position ");
 550   if (use->op_p)
 551     print_generic_expr (file, *use->op_p, TDF_SLIM);
 552   fprintf (file, "\n");
 553
 554   dump_iv (file, use->iv);
 555
 556   if (use->related_cands)
 557     {
 558       fprintf (file, "  related candidates ");
 559       dump_bitmap (file, use->related_cands);
 560     }
 561 }
 562
 563 /* Dumps information about the uses to FILE.  */
 564
 565 void
 566 dump_uses (FILE *file, struct ivopts_data *data)
 567 {
 568   unsigned i;
 569   struct iv_use *use;
 570
 571   for (i = 0; i < n_iv_uses (data); i++)
 572     {
 573       use = iv_use (data, i);
 574
 575       dump_use (file, use);
 576       fprintf (file, "\n");
 577     }
 578 }
 579
 580 /* Dumps information about induction variable candidate CAND to FILE.  */
 581
 582 void
 583 dump_cand (FILE *file, struct iv_cand *cand)
 584 {
 585   struct iv *iv = cand->iv;
 586
 587   fprintf (file, "candidate %d%s\n",
 588            cand->id, cand->important ? " (important)" : "");
 589
 590   if (cand->depends_on)
 591     {
 592       fprintf (file, "  depends on ");
 593       dump_bitmap (file, cand->depends_on);
 594     }
 595
 596   if (!iv)
 597     {
 598       fprintf (file, "  final value replacement\n");
 599       return;
 600     }
 601
 602   if (cand->var_before)
 603     {
 604       fprintf (file, "  var_before ");
 605       print_generic_expr (file, cand->var_before, TDF_SLIM);
 606       fprintf (file, "\n");
 607     }
 608   if (cand->var_after)
 609     {
 610       fprintf (file, "  var_after ");
 611       print_generic_expr (file, cand->var_after, TDF_SLIM);
 612       fprintf (file, "\n");
 613     }
 614
 615   switch (cand->pos)
 616     {
 617     case IP_NORMAL:
 618       fprintf (file, "  incremented before exit test\n");
 619       break;
 620
 621     case IP_BEFORE_USE:
 622       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 623       break;
 624
 625     case IP_AFTER_USE:
 626       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 627       break;
 628
 629     case IP_END:
 630       fprintf (file, "  incremented at end\n");
 631       break;
 632
 633     case IP_ORIGINAL:
 634       fprintf (file, "  original biv\n");
 635       break;
 636     }
 637
 638   dump_iv (file, iv);
 639 }
 640
 641 /* Returns the info for ssa version VER.  */
 642
 643 static inline struct version_info *
 644 ver_info (struct ivopts_data *data, unsigned ver)
 645 {
 646   return data->version_info + ver;
 647 }
 648
 649 /* Returns the info for ssa name NAME.  */
 650
 651 static inline struct version_info *
 652 name_info (struct ivopts_data *data, tree name)
 653 {
 654   return ver_info (data, SSA_NAME_VERSION (name));
 655 }
 656
 657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 658    emitted in LOOP.  */
 659
 660 static bool
 661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 662 {
 663   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 664
 665   gcc_assert (bb);
 666
 667   if (sbb == loop->latch)
 668     return true;
 669
 670   if (sbb != bb)
 671     return false;
 672
 673   return stmt == last_stmt (bb);
 674 }
 675
 676 /* Returns true if STMT if after the place where the original induction
 677    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 678    if the positions are identical.  */
 679
 680 static bool
 681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 682 {
 683   basic_block cand_bb = gimple_bb (cand->incremented_at);
 684   basic_block stmt_bb = gimple_bb (stmt);
 685
 686   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 687     return false;
 688
 689   if (stmt_bb != cand_bb)
 690     return true;
 691
 692   if (true_if_equal
 693       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 694     return true;
 695   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 696 }
 697
 698 /* Returns true if STMT if after the place where the induction variable
 699    CAND is incremented in LOOP.  */
 700
 701 static bool
 702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 703 {
 704   switch (cand->pos)
 705     {
 706     case IP_END:
 707       return false;
 708
 709     case IP_NORMAL:
 710       return stmt_after_ip_normal_pos (loop, stmt);
 711
 712     case IP_ORIGINAL:
 713     case IP_AFTER_USE:
 714       return stmt_after_inc_pos (cand, stmt, false);
 715
 716     case IP_BEFORE_USE:
 717       return stmt_after_inc_pos (cand, stmt, true);
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722 }
 723
 724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 725
 726 static bool
 727 abnormal_ssa_name_p (tree exp)
 728 {
 729   if (!exp)
 730     return false;
 731
 732   if (TREE_CODE (exp) != SSA_NAME)
 733     return false;
 734
 735   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 736 }
 737
 738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 739    abnormal phi node.  Callback for for_each_index.  */
 740
 741 static bool
 742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 743                                   void *data ATTRIBUTE_UNUSED)
 744 {
 745   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 746     {
 747       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 748         return false;
 749       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 750         return false;
 751     }
 752
 753   return !abnormal_ssa_name_p (*index);
 754 }
 755
 756 /* Returns true if EXPR contains a ssa name that occurs in an
 757    abnormal phi node.  */
 758
 759 bool
 760 contains_abnormal_ssa_name_p (tree expr)
 761 {
 762   enum tree_code code;
 763   enum tree_code_class codeclass;
 764
 765   if (!expr)
 766     return false;
 767
 768   code = TREE_CODE (expr);
 769   codeclass = TREE_CODE_CLASS (code);
 770
 771   if (code == SSA_NAME)
 772     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 773
 774   if (code == INTEGER_CST
 775       || is_gimple_min_invariant (expr))
 776     return false;
 777
 778   if (code == ADDR_EXPR)
 779     return !for_each_index (&TREE_OPERAND (expr, 0),
 780                             idx_contains_abnormal_ssa_name_p,
 781                             NULL);
 782
 783   if (code == COND_EXPR)
 784     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 785       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 786       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 787
 788   switch (codeclass)
 789     {
 790     case tcc_binary:
 791     case tcc_comparison:
 792       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 793         return true;
 794
 795       /* Fallthru.  */
 796     case tcc_unary:
 797       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 798         return true;
 799
 800       break;
 801
 802     default:
 803       gcc_unreachable ();
 804     }
 805
 806   return false;
 807 }
 808
 809 /*  Returns the structure describing number of iterations determined from
 810     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 811
 812 static struct tree_niter_desc *
 813 niter_for_exit (struct ivopts_data *data, edge exit)
 814 {
 815   struct tree_niter_desc *desc;
 816   void **slot;
 817
 818   if (!data->niters)
 819     {
 820       data->niters = pointer_map_create ();
 821       slot = NULL;
 822     }
 823   else
 824     slot = pointer_map_contains (data->niters, exit);
 825
 826   if (!slot)
 827     {
 828       /* Try to determine number of iterations.  We cannot safely work with ssa
 829          names that appear in phi nodes on abnormal edges, so that we do not
 830          create overlapping life ranges for them (PR 27283).  */
 831       desc = XNEW (struct tree_niter_desc);
 832       if (!number_of_iterations_exit (data->current_loop,
 833                                       exit, desc, true)
 834           || contains_abnormal_ssa_name_p (desc->niter))
 835         {
 836           XDELETE (desc);
 837           desc = NULL;
 838         }
 839       slot = pointer_map_insert (data->niters, exit);
 840       *slot = desc;
 841     }
 842   else
 843     desc = (struct tree_niter_desc *) *slot;
 844
 845   return desc;
 846 }
 847
 848 /* Returns the structure describing number of iterations determined from
 849    single dominating exit of DATA->current_loop, or NULL if something
 850    goes wrong.  */
 851
 852 static struct tree_niter_desc *
 853 niter_for_single_dom_exit (struct ivopts_data *data)
 854 {
 855   edge exit = single_dom_exit (data->current_loop);
 856
 857   if (!exit)
 858     return NULL;
 859
 860   return niter_for_exit (data, exit);
 861 }
 862
 863 /* Initializes data structures used by the iv optimization pass, stored
 864    in DATA.  */
 865
 866 static void
 867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 868 {
 869   data->version_info_size = 2 * num_ssa_names;
 870   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 871   data->relevant = BITMAP_ALLOC (NULL);
 872   data->important_candidates = BITMAP_ALLOC (NULL);
 873   data->max_inv_id = 0;
 874   data->niters = NULL;
 875   data->iv_uses.create (20);
 876   data->iv_candidates.create (20);
 877   data->inv_expr_tab.create (10);
 878   data->inv_expr_id = 0;
 879   decl_rtl_to_reset.create (20);
 880 }
 881
 882 /* Returns a memory object to that EXPR points.  In case we are able to
 883    determine that it does not point to any such object, NULL is returned.  */
 884
 885 static tree
 886 determine_base_object (tree expr)
 887 {
 888   enum tree_code code = TREE_CODE (expr);
 889   tree base, obj;
 890
 891   /* If this is a pointer casted to any type, we need to determine
 892      the base object for the pointer; so handle conversions before
 893      throwing away non-pointer expressions.  */
 894   if (CONVERT_EXPR_P (expr))
 895     return determine_base_object (TREE_OPERAND (expr, 0));
 896
 897   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 898     return NULL_TREE;
 899
 900   switch (code)
 901     {
 902     case INTEGER_CST:
 903       return NULL_TREE;
 904
 905     case ADDR_EXPR:
 906       obj = TREE_OPERAND (expr, 0);
 907       base = get_base_address (obj);
 908
 909       if (!base)
 910         return expr;
 911
 912       if (TREE_CODE (base) == MEM_REF)
 913         return determine_base_object (TREE_OPERAND (base, 0));
 914
 915       return fold_convert (ptr_type_node,
 916                            build_fold_addr_expr (base));
 917
 918     case POINTER_PLUS_EXPR:
 919       return determine_base_object (TREE_OPERAND (expr, 0));
 920
 921     case PLUS_EXPR:
 922     case MINUS_EXPR:
 923       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 924       gcc_unreachable ();
 925
 926     default:
 927       return fold_convert (ptr_type_node, expr);
 928     }
 929 }
 930
 931 /* Allocates an induction variable with given initial value BASE and step STEP
 932    for loop LOOP.  */
 933
 934 static struct iv *
 935 alloc_iv (tree base, tree step)
 936 {
 937   tree base_object = base;
 938   struct iv *iv = XCNEW (struct iv);
 939   gcc_assert (step != NULL_TREE);
 940
 941   /* Lower all address expressions except ones with DECL_P as operand.
 942      By doing this:
 943        1) More accurate cost can be computed for address expressions;
 944        2) Duplicate candidates won't be created for bases in different
 945           forms, like &a[0] and &a.  */
 946   STRIP_NOPS (base_object);
 947   if (TREE_CODE (base_object) == ADDR_EXPR
 948       && !DECL_P (TREE_OPERAND (base_object, 0)))
 949     {
 950       aff_tree comb;
 951       double_int size;
 952       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
 953                                              &comb, &size);
 954       gcc_assert (base_object != NULL_TREE);
 955       base_object = build_fold_addr_expr (base_object);
 956       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 957     }
 958
 959   iv->base = base;
 960   iv->base_object = determine_base_object (base_object);
 961   iv->step = step;
 962   iv->biv_p = false;
 963   iv->have_use_for = false;
 964   iv->use_id = 0;
 965   iv->ssa_name = NULL_TREE;
 966
 967   return iv;
 968 }
 969
 970 /* Sets STEP and BASE for induction variable IV.  */
 971
 972 static void
 973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 974 {
 975   struct version_info *info = name_info (data, iv);
 976
 977   gcc_assert (!info->iv);
 978
 979   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 980   info->iv = alloc_iv (base, step);
 981   info->iv->ssa_name = iv;
 982 }
 983
 984 /* Finds induction variable declaration for VAR.  */
 985
 986 static struct iv *
 987 get_iv (struct ivopts_data *data, tree var)
 988 {
 989   basic_block bb;
 990   tree type = TREE_TYPE (var);
 991
 992   if (!POINTER_TYPE_P (type)
 993       && !INTEGRAL_TYPE_P (type))
 994     return NULL;
 995
 996   if (!name_info (data, var)->iv)
 997     {
 998       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 999
1000       if (!bb
1001           || !flow_bb_inside_loop_p (data->current_loop, bb))
1002         set_iv (data, var, var, build_int_cst (type, 0));
1003     }
1004
1005   return name_info (data, var)->iv;
1006 }
1007
1008 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1009    not define a simple affine biv with nonzero step.  */
1010
1011 static tree
1012 determine_biv_step (gimple phi)
1013 {
1014   struct loop *loop = gimple_bb (phi)->loop_father;
1015   tree name = PHI_RESULT (phi);
1016   affine_iv iv;
1017
1018   if (virtual_operand_p (name))
1019     return NULL_TREE;
1020
1021   if (!simple_iv (loop, loop, name, &iv, true))
1022     return NULL_TREE;
1023
1024   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1025 }
1026
1027 /* Finds basic ivs.  */
1028
1029 static bool
1030 find_bivs (struct ivopts_data *data)
1031 {
1032   gimple phi;
1033   tree step, type, base;
1034   bool found = false;
1035   struct loop *loop = data->current_loop;
1036   gimple_stmt_iterator psi;
1037
1038   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1039     {
1040       phi = gsi_stmt (psi);
1041
1042       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043         continue;
1044
1045       step = determine_biv_step (phi);
1046       if (!step)
1047         continue;
1048
1049       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050       base = expand_simple_operations (base);
1051       if (contains_abnormal_ssa_name_p (base)
1052           || contains_abnormal_ssa_name_p (step))
1053         continue;
1054
1055       type = TREE_TYPE (PHI_RESULT (phi));
1056       base = fold_convert (type, base);
1057       if (step)
1058         {
1059           if (POINTER_TYPE_P (type))
1060             step = convert_to_ptrofftype (step);
1061           else
1062             step = fold_convert (type, step);
1063         }
1064
1065       set_iv (data, PHI_RESULT (phi), base, step);
1066       found = true;
1067     }
1068
1069   return found;
1070 }
1071
1072 /* Marks basic ivs.  */
1073
1074 static void
1075 mark_bivs (struct ivopts_data *data)
1076 {
1077   gimple phi;
1078   tree var;
1079   struct iv *iv, *incr_iv;
1080   struct loop *loop = data->current_loop;
1081   basic_block incr_bb;
1082   gimple_stmt_iterator psi;
1083
1084   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085     {
1086       phi = gsi_stmt (psi);
1087
1088       iv = get_iv (data, PHI_RESULT (phi));
1089       if (!iv)
1090         continue;
1091
1092       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093       incr_iv = get_iv (data, var);
1094       if (!incr_iv)
1095         continue;
1096
1097       /* If the increment is in the subloop, ignore it.  */
1098       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1099       if (incr_bb->loop_father != data->current_loop
1100           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1101         continue;
1102
1103       iv->biv_p = true;
1104       incr_iv->biv_p = true;
1105     }
1106 }
1107
1108 /* Checks whether STMT defines a linear induction variable and stores its
1109    parameters to IV.  */
1110
1111 static bool
1112 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1113 {
1114   tree lhs;
1115   struct loop *loop = data->current_loop;
1116
1117   iv->base = NULL_TREE;
1118   iv->step = NULL_TREE;
1119
1120   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1121     return false;
1122
1123   lhs = gimple_assign_lhs (stmt);
1124   if (TREE_CODE (lhs) != SSA_NAME)
1125     return false;
1126
1127   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1128     return false;
1129   iv->base = expand_simple_operations (iv->base);
1130
1131   if (contains_abnormal_ssa_name_p (iv->base)
1132       || contains_abnormal_ssa_name_p (iv->step))
1133     return false;
1134
1135   /* If STMT could throw, then do not consider STMT as defining a GIV.
1136      While this will suppress optimizations, we can not safely delete this
1137      GIV and associated statements, even if it appears it is not used.  */
1138   if (stmt_could_throw_p (stmt))
1139     return false;
1140
1141   return true;
1142 }
1143
1144 /* Finds general ivs in statement STMT.  */
1145
1146 static void
1147 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1148 {
1149   affine_iv iv;
1150
1151   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1152     return;
1153
1154   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1155 }
1156
1157 /* Finds general ivs in basic block BB.  */
1158
1159 static void
1160 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1161 {
1162   gimple_stmt_iterator bsi;
1163
1164   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1165     find_givs_in_stmt (data, gsi_stmt (bsi));
1166 }
1167
1168 /* Finds general ivs.  */
1169
1170 static void
1171 find_givs (struct ivopts_data *data)
1172 {
1173   struct loop *loop = data->current_loop;
1174   basic_block *body = get_loop_body_in_dom_order (loop);
1175   unsigned i;
1176
1177   for (i = 0; i < loop->num_nodes; i++)
1178     find_givs_in_bb (data, body[i]);
1179   free (body);
1180 }
1181
1182 /* For each ssa name defined in LOOP determines whether it is an induction
1183    variable and if so, its initial value and step.  */
1184
1185 static bool
1186 find_induction_variables (struct ivopts_data *data)
1187 {
1188   unsigned i;
1189   bitmap_iterator bi;
1190
1191   if (!find_bivs (data))
1192     return false;
1193
1194   find_givs (data);
1195   mark_bivs (data);
1196
1197   if (dump_file && (dump_flags & TDF_DETAILS))
1198     {
1199       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1200
1201       if (niter)
1202         {
1203           fprintf (dump_file, "  number of iterations ");
1204           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1205           if (!integer_zerop (niter->may_be_zero))
1206             {
1207               fprintf (dump_file, "; zero if ");
1208               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1209             }
1210           fprintf (dump_file, "\n\n");
1211         };
1212
1213       fprintf (dump_file, "Induction variables:\n\n");
1214
1215       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1216         {
1217           if (ver_info (data, i)->iv)
1218             dump_iv (dump_file, ver_info (data, i)->iv);
1219         }
1220     }
1221
1222   return true;
1223 }
1224
1225 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1226
1227 static struct iv_use *
1228 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1229             gimple stmt, enum use_type use_type)
1230 {
1231   struct iv_use *use = XCNEW (struct iv_use);
1232
1233   use->id = n_iv_uses (data);
1234   use->type = use_type;
1235   use->iv = iv;
1236   use->stmt = stmt;
1237   use->op_p = use_p;
1238   use->related_cands = BITMAP_ALLOC (NULL);
1239
1240   /* To avoid showing ssa name in the dumps, if it was not reset by the
1241      caller.  */
1242   iv->ssa_name = NULL_TREE;
1243
1244   if (dump_file && (dump_flags & TDF_DETAILS))
1245     dump_use (dump_file, use);
1246
1247   data->iv_uses.safe_push (use);
1248
1249   return use;
1250 }
1251
1252 /* Checks whether OP is a loop-level invariant and if so, records it.
1253    NONLINEAR_USE is true if the invariant is used in a way we do not
1254    handle specially.  */
1255
1256 static void
1257 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1258 {
1259   basic_block bb;
1260   struct version_info *info;
1261
1262   if (TREE_CODE (op) != SSA_NAME
1263       || virtual_operand_p (op))
1264     return;
1265
1266   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1267   if (bb
1268       && flow_bb_inside_loop_p (data->current_loop, bb))
1269     return;
1270
1271   info = name_info (data, op);
1272   info->name = op;
1273   info->has_nonlin_use |= nonlinear_use;
1274   if (!info->inv_id)
1275     info->inv_id = ++data->max_inv_id;
1276   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1277 }
1278
1279 /* Checks whether the use OP is interesting and if so, records it.  */
1280
1281 static struct iv_use *
1282 find_interesting_uses_op (struct ivopts_data *data, tree op)
1283 {
1284   struct iv *iv;
1285   struct iv *civ;
1286   gimple stmt;
1287   struct iv_use *use;
1288
1289   if (TREE_CODE (op) != SSA_NAME)
1290     return NULL;
1291
1292   iv = get_iv (data, op);
1293   if (!iv)
1294     return NULL;
1295
1296   if (iv->have_use_for)
1297     {
1298       use = iv_use (data, iv->use_id);
1299
1300       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1301       return use;
1302     }
1303
1304   if (integer_zerop (iv->step))
1305     {
1306       record_invariant (data, op, true);
1307       return NULL;
1308     }
1309   iv->have_use_for = true;
1310
1311   civ = XNEW (struct iv);
1312   *civ = *iv;
1313
1314   stmt = SSA_NAME_DEF_STMT (op);
1315   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1316               || is_gimple_assign (stmt));
1317
1318   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1319   iv->use_id = use->id;
1320
1321   return use;
1322 }
1323
1324 /* Given a condition in statement STMT, checks whether it is a compare
1325    of an induction variable and an invariant.  If this is the case,
1326    CONTROL_VAR is set to location of the iv, BOUND to the location of
1327    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1328    induction variable descriptions, and true is returned.  If this is not
1329    the case, CONTROL_VAR and BOUND are set to the arguments of the
1330    condition and false is returned.  */
1331
1332 static bool
1333 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1334                        tree **control_var, tree **bound,
1335                        struct iv **iv_var, struct iv **iv_bound)
1336 {
1337   /* The objects returned when COND has constant operands.  */
1338   static struct iv const_iv;
1339   static tree zero;
1340   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1341   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1342   bool ret = false;
1343
1344   if (gimple_code (stmt) == GIMPLE_COND)
1345     {
1346       op0 = gimple_cond_lhs_ptr (stmt);
1347       op1 = gimple_cond_rhs_ptr (stmt);
1348     }
1349   else
1350     {
1351       op0 = gimple_assign_rhs1_ptr (stmt);
1352       op1 = gimple_assign_rhs2_ptr (stmt);
1353     }
1354
1355   zero = integer_zero_node;
1356   const_iv.step = integer_zero_node;
1357
1358   if (TREE_CODE (*op0) == SSA_NAME)
1359     iv0 = get_iv (data, *op0);
1360   if (TREE_CODE (*op1) == SSA_NAME)
1361     iv1 = get_iv (data, *op1);
1362
1363   /* Exactly one of the compared values must be an iv, and the other one must
1364      be an invariant.  */
1365   if (!iv0 || !iv1)
1366     goto end;
1367
1368   if (integer_zerop (iv0->step))
1369     {
1370       /* Control variable may be on the other side.  */
1371       tmp_op = op0; op0 = op1; op1 = tmp_op;
1372       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1373     }
1374   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1375
1376 end:
1377   if (control_var)
1378     *control_var = op0;;
1379   if (iv_var)
1380     *iv_var = iv0;;
1381   if (bound)
1382     *bound = op1;
1383   if (iv_bound)
1384     *iv_bound = iv1;
1385
1386   return ret;
1387 }
1388
1389 /* Checks whether the condition in STMT is interesting and if so,
1390    records it.  */
1391
1392 static void
1393 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1394 {
1395   tree *var_p, *bound_p;
1396   struct iv *var_iv, *civ;
1397
1398   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1399     {
1400       find_interesting_uses_op (data, *var_p);
1401       find_interesting_uses_op (data, *bound_p);
1402       return;
1403     }
1404
1405   civ = XNEW (struct iv);
1406   *civ = *var_iv;
1407   record_use (data, NULL, civ, stmt, USE_COMPARE);
1408 }
1409
1410 /* Returns the outermost loop EXPR is obviously invariant in
1411    relative to the loop LOOP, i.e. if all its operands are defined
1412    outside of the returned loop.  Returns NULL if EXPR is not
1413    even obviously invariant in LOOP.  */
1414
1415 struct loop *
1416 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1417 {
1418   basic_block def_bb;
1419   unsigned i, len;
1420
1421   if (is_gimple_min_invariant (expr))
1422     return current_loops->tree_root;
1423
1424   if (TREE_CODE (expr) == SSA_NAME)
1425     {
1426       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1427       if (def_bb)
1428         {
1429           if (flow_bb_inside_loop_p (loop, def_bb))
1430             return NULL;
1431           return superloop_at_depth (loop,
1432                                      loop_depth (def_bb->loop_father) + 1);
1433         }
1434
1435       return current_loops->tree_root;
1436     }
1437
1438   if (!EXPR_P (expr))
1439     return NULL;
1440
1441   unsigned maxdepth = 0;
1442   len = TREE_OPERAND_LENGTH (expr);
1443   for (i = 0; i < len; i++)
1444     {
1445       struct loop *ivloop;
1446       if (!TREE_OPERAND (expr, i))
1447         continue;
1448
1449       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1450       if (!ivloop)
1451         return NULL;
1452       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1453     }
1454
1455   return superloop_at_depth (loop, maxdepth);
1456 }
1457
1458 /* Returns true if expression EXPR is obviously invariant in LOOP,
1459    i.e. if all its operands are defined outside of the LOOP.  LOOP
1460    should not be the function body.  */
1461
1462 bool
1463 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1464 {
1465   basic_block def_bb;
1466   unsigned i, len;
1467
1468   gcc_assert (loop_depth (loop) > 0);
1469
1470   if (is_gimple_min_invariant (expr))
1471     return true;
1472
1473   if (TREE_CODE (expr) == SSA_NAME)
1474     {
1475       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1476       if (def_bb
1477           && flow_bb_inside_loop_p (loop, def_bb))
1478         return false;
1479
1480       return true;
1481     }
1482
1483   if (!EXPR_P (expr))
1484     return false;
1485
1486   len = TREE_OPERAND_LENGTH (expr);
1487   for (i = 0; i < len; i++)
1488     if (TREE_OPERAND (expr, i)
1489         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1490       return false;
1491
1492   return true;
1493 }
1494
1495 /* Cumulates the steps of indices into DATA and replaces their values with the
1496    initial ones.  Returns false when the value of the index cannot be determined.
1497    Callback for for_each_index.  */
1498
1499 struct ifs_ivopts_data
1500 {
1501   struct ivopts_data *ivopts_data;
1502   gimple stmt;
1503   tree step;
1504 };
1505
1506 static bool
1507 idx_find_step (tree base, tree *idx, void *data)
1508 {
1509   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1510   struct iv *iv;
1511   tree step, iv_base, iv_step, lbound, off;
1512   struct loop *loop = dta->ivopts_data->current_loop;
1513
1514   /* If base is a component ref, require that the offset of the reference
1515      be invariant.  */
1516   if (TREE_CODE (base) == COMPONENT_REF)
1517     {
1518       off = component_ref_field_offset (base);
1519       return expr_invariant_in_loop_p (loop, off);
1520     }
1521
1522   /* If base is array, first check whether we will be able to move the
1523      reference out of the loop (in order to take its address in strength
1524      reduction).  In order for this to work we need both lower bound
1525      and step to be loop invariants.  */
1526   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1527     {
1528       /* Moreover, for a range, the size needs to be invariant as well.  */
1529       if (TREE_CODE (base) == ARRAY_RANGE_REF
1530           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1531         return false;
1532
1533       step = array_ref_element_size (base);
1534       lbound = array_ref_low_bound (base);
1535
1536       if (!expr_invariant_in_loop_p (loop, step)
1537           || !expr_invariant_in_loop_p (loop, lbound))
1538         return false;
1539     }
1540
1541   if (TREE_CODE (*idx) != SSA_NAME)
1542     return true;
1543
1544   iv = get_iv (dta->ivopts_data, *idx);
1545   if (!iv)
1546     return false;
1547
1548   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1549           *&x[0], which is not folded and does not trigger the
1550           ARRAY_REF path below.  */
1551   *idx = iv->base;
1552
1553   if (integer_zerop (iv->step))
1554     return true;
1555
1556   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1557     {
1558       step = array_ref_element_size (base);
1559
1560       /* We only handle addresses whose step is an integer constant.  */
1561       if (TREE_CODE (step) != INTEGER_CST)
1562         return false;
1563     }
1564   else
1565     /* The step for pointer arithmetics already is 1 byte.  */
1566     step = size_one_node;
1567
1568   iv_base = iv->base;
1569   iv_step = iv->step;
1570   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1571                             sizetype, &iv_base, &iv_step, dta->stmt,
1572                             false))
1573     {
1574       /* The index might wrap.  */
1575       return false;
1576     }
1577
1578   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1579   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1580
1581   return true;
1582 }
1583
1584 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1585    object is passed to it in DATA.  */
1586
1587 static bool
1588 idx_record_use (tree base, tree *idx,
1589                 void *vdata)
1590 {
1591   struct ivopts_data *data = (struct ivopts_data *) vdata;
1592   find_interesting_uses_op (data, *idx);
1593   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1594     {
1595       find_interesting_uses_op (data, array_ref_element_size (base));
1596       find_interesting_uses_op (data, array_ref_low_bound (base));
1597     }
1598   return true;
1599 }
1600
1601 /* If we can prove that TOP = cst * BOT for some constant cst,
1602    store cst to MUL and return true.  Otherwise return false.
1603    The returned value is always sign-extended, regardless of the
1604    signedness of TOP and BOT.  */
1605
1606 static bool
1607 constant_multiple_of (tree top, tree bot, double_int *mul)
1608 {
1609   tree mby;
1610   enum tree_code code;
1611   double_int res, p0, p1;
1612   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1613
1614   STRIP_NOPS (top);
1615   STRIP_NOPS (bot);
1616
1617   if (operand_equal_p (top, bot, 0))
1618     {
1619       *mul = double_int_one;
1620       return true;
1621     }
1622
1623   code = TREE_CODE (top);
1624   switch (code)
1625     {
1626     case MULT_EXPR:
1627       mby = TREE_OPERAND (top, 1);
1628       if (TREE_CODE (mby) != INTEGER_CST)
1629         return false;
1630
1631       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1632         return false;
1633
1634       *mul = (res * tree_to_double_int (mby)).sext (precision);
1635       return true;
1636
1637     case PLUS_EXPR:
1638     case MINUS_EXPR:
1639       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1640           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1641         return false;
1642
1643       if (code == MINUS_EXPR)
1644         p1 = -p1;
1645       *mul = (p0 + p1).sext (precision);
1646       return true;
1647
1648     case INTEGER_CST:
1649       if (TREE_CODE (bot) != INTEGER_CST)
1650         return false;
1651
1652       p0 = tree_to_double_int (top).sext (precision);
1653       p1 = tree_to_double_int (bot).sext (precision);
1654       if (p1.is_zero ())
1655         return false;
1656       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1657       return res.is_zero ();
1658
1659     default:
1660       return false;
1661     }
1662 }
1663
1664 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1665
1666 static bool
1667 may_be_unaligned_p (tree ref, tree step)
1668 {
1669   tree base;
1670   tree base_type;
1671   HOST_WIDE_INT bitsize;
1672   HOST_WIDE_INT bitpos;
1673   tree toffset;
1674   enum machine_mode mode;
1675   int unsignedp, volatilep;
1676   unsigned base_align;
1677
1678   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1679      thus they are not misaligned.  */
1680   if (TREE_CODE (ref) == TARGET_MEM_REF)
1681     return false;
1682
1683   /* The test below is basically copy of what expr.c:normal_inner_ref
1684      does to check whether the object must be loaded by parts when
1685      STRICT_ALIGNMENT is true.  */
1686   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1687                               &unsignedp, &volatilep);
1688   base_type = TREE_TYPE (base);
1689   base_align = get_object_alignment (base);
1690   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1691
1692   if (mode != BLKmode)
1693     {
1694       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1695
1696       if (base_align < mode_align
1697           || (bitpos % mode_align) != 0
1698           || (bitpos % BITS_PER_UNIT) != 0)
1699         return true;
1700
1701       if (toffset
1702           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1703         return true;
1704
1705       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1706         return true;
1707     }
1708
1709   return false;
1710 }
1711
1712 /* Return true if EXPR may be non-addressable.   */
1713
1714 bool
1715 may_be_nonaddressable_p (tree expr)
1716 {
1717   switch (TREE_CODE (expr))
1718     {
1719     case TARGET_MEM_REF:
1720       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1721          target, thus they are always addressable.  */
1722       return false;
1723
1724     case COMPONENT_REF:
1725       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1726              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1727
1728     case VIEW_CONVERT_EXPR:
1729       /* This kind of view-conversions may wrap non-addressable objects
1730          and make them look addressable.  After some processing the
1731          non-addressability may be uncovered again, causing ADDR_EXPRs
1732          of inappropriate objects to be built.  */
1733       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1734           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1735         return true;
1736
1737       /* ... fall through ... */
1738
1739     case ARRAY_REF:
1740     case ARRAY_RANGE_REF:
1741       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1742
1743     CASE_CONVERT:
1744       return true;
1745
1746     default:
1747       break;
1748     }
1749
1750   return false;
1751 }
1752
1753 /* Finds addresses in *OP_P inside STMT.  */
1754
1755 static void
1756 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1757 {
1758   tree base = *op_p, step = size_zero_node;
1759   struct iv *civ;
1760   struct ifs_ivopts_data ifs_ivopts_data;
1761
1762   /* Do not play with volatile memory references.  A bit too conservative,
1763      perhaps, but safe.  */
1764   if (gimple_has_volatile_ops (stmt))
1765     goto fail;
1766
1767   /* Ignore bitfields for now.  Not really something terribly complicated
1768      to handle.  TODO.  */
1769   if (TREE_CODE (base) == BIT_FIELD_REF)
1770     goto fail;
1771
1772   base = unshare_expr (base);
1773
1774   if (TREE_CODE (base) == TARGET_MEM_REF)
1775     {
1776       tree type = build_pointer_type (TREE_TYPE (base));
1777       tree astep;
1778
1779       if (TMR_BASE (base)
1780           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1781         {
1782           civ = get_iv (data, TMR_BASE (base));
1783           if (!civ)
1784             goto fail;
1785
1786           TMR_BASE (base) = civ->base;
1787           step = civ->step;
1788         }
1789       if (TMR_INDEX2 (base)
1790           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1791         {
1792           civ = get_iv (data, TMR_INDEX2 (base));
1793           if (!civ)
1794             goto fail;
1795
1796           TMR_INDEX2 (base) = civ->base;
1797           step = civ->step;
1798         }
1799       if (TMR_INDEX (base)
1800           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1801         {
1802           civ = get_iv (data, TMR_INDEX (base));
1803           if (!civ)
1804             goto fail;
1805
1806           TMR_INDEX (base) = civ->base;
1807           astep = civ->step;
1808
1809           if (astep)
1810             {
1811               if (TMR_STEP (base))
1812                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1813
1814               step = fold_build2 (PLUS_EXPR, type, step, astep);
1815             }
1816         }
1817
1818       if (integer_zerop (step))
1819         goto fail;
1820       base = tree_mem_ref_addr (type, base);
1821     }
1822   else
1823     {
1824       ifs_ivopts_data.ivopts_data = data;
1825       ifs_ivopts_data.stmt = stmt;
1826       ifs_ivopts_data.step = size_zero_node;
1827       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1828           || integer_zerop (ifs_ivopts_data.step))
1829         goto fail;
1830       step = ifs_ivopts_data.step;
1831
1832       /* Check that the base expression is addressable.  This needs
1833          to be done after substituting bases of IVs into it.  */
1834       if (may_be_nonaddressable_p (base))
1835         goto fail;
1836
1837       /* Moreover, on strict alignment platforms, check that it is
1838          sufficiently aligned.  */
1839       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1840         goto fail;
1841
1842       base = build_fold_addr_expr (base);
1843
1844       /* Substituting bases of IVs into the base expression might
1845          have caused folding opportunities.  */
1846       if (TREE_CODE (base) == ADDR_EXPR)
1847         {
1848           tree *ref = &TREE_OPERAND (base, 0);
1849           while (handled_component_p (*ref))
1850             ref = &TREE_OPERAND (*ref, 0);
1851           if (TREE_CODE (*ref) == MEM_REF)
1852             {
1853               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1854                                       TREE_OPERAND (*ref, 0),
1855                                       TREE_OPERAND (*ref, 1));
1856               if (tem)
1857                 *ref = tem;
1858             }
1859         }
1860     }
1861
1862   civ = alloc_iv (base, step);
1863   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1864   return;
1865
1866 fail:
1867   for_each_index (op_p, idx_record_use, data);
1868 }
1869
1870 /* Finds and records invariants used in STMT.  */
1871
1872 static void
1873 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1874 {
1875   ssa_op_iter iter;
1876   use_operand_p use_p;
1877   tree op;
1878
1879   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1880     {
1881       op = USE_FROM_PTR (use_p);
1882       record_invariant (data, op, false);
1883     }
1884 }
1885
1886 /* Finds interesting uses of induction variables in the statement STMT.  */
1887
1888 static void
1889 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1890 {
1891   struct iv *iv;
1892   tree op, *lhs, *rhs;
1893   ssa_op_iter iter;
1894   use_operand_p use_p;
1895   enum tree_code code;
1896
1897   find_invariants_stmt (data, stmt);
1898
1899   if (gimple_code (stmt) == GIMPLE_COND)
1900     {
1901       find_interesting_uses_cond (data, stmt);
1902       return;
1903     }
1904
1905   if (is_gimple_assign (stmt))
1906     {
1907       lhs = gimple_assign_lhs_ptr (stmt);
1908       rhs = gimple_assign_rhs1_ptr (stmt);
1909
1910       if (TREE_CODE (*lhs) == SSA_NAME)
1911         {
1912           /* If the statement defines an induction variable, the uses are not
1913              interesting by themselves.  */
1914
1915           iv = get_iv (data, *lhs);
1916
1917           if (iv && !integer_zerop (iv->step))
1918             return;
1919         }
1920
1921       code = gimple_assign_rhs_code (stmt);
1922       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1923           && (REFERENCE_CLASS_P (*rhs)
1924               || is_gimple_val (*rhs)))
1925         {
1926           if (REFERENCE_CLASS_P (*rhs))
1927             find_interesting_uses_address (data, stmt, rhs);
1928           else
1929             find_interesting_uses_op (data, *rhs);
1930
1931           if (REFERENCE_CLASS_P (*lhs))
1932             find_interesting_uses_address (data, stmt, lhs);
1933           return;
1934         }
1935       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1936         {
1937           find_interesting_uses_cond (data, stmt);
1938           return;
1939         }
1940
1941       /* TODO -- we should also handle address uses of type
1942
1943          memory = call (whatever);
1944
1945          and
1946
1947          call (memory).  */
1948     }
1949
1950   if (gimple_code (stmt) == GIMPLE_PHI
1951       && gimple_bb (stmt) == data->current_loop->header)
1952     {
1953       iv = get_iv (data, PHI_RESULT (stmt));
1954
1955       if (iv && !integer_zerop (iv->step))
1956         return;
1957     }
1958
1959   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1960     {
1961       op = USE_FROM_PTR (use_p);
1962
1963       if (TREE_CODE (op) != SSA_NAME)
1964         continue;
1965
1966       iv = get_iv (data, op);
1967       if (!iv)
1968         continue;
1969
1970       find_interesting_uses_op (data, op);
1971     }
1972 }
1973
1974 /* Finds interesting uses of induction variables outside of loops
1975    on loop exit edge EXIT.  */
1976
1977 static void
1978 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1979 {
1980   gimple phi;
1981   gimple_stmt_iterator psi;
1982   tree def;
1983
1984   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1985     {
1986       phi = gsi_stmt (psi);
1987       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1988       if (!virtual_operand_p (def))
1989         find_interesting_uses_op (data, def);
1990     }
1991 }
1992
1993 /* Finds uses of the induction variables that are interesting.  */
1994
1995 static void
1996 find_interesting_uses (struct ivopts_data *data)
1997 {
1998   basic_block bb;
1999   gimple_stmt_iterator bsi;
2000   basic_block *body = get_loop_body (data->current_loop);
2001   unsigned i;
2002   struct version_info *info;
2003   edge e;
2004
2005   if (dump_file && (dump_flags & TDF_DETAILS))
2006     fprintf (dump_file, "Uses:\n\n");
2007
2008   for (i = 0; i < data->current_loop->num_nodes; i++)
2009     {
2010       edge_iterator ei;
2011       bb = body[i];
2012
2013       FOR_EACH_EDGE (e, ei, bb->succs)
2014         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2015             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2016           find_interesting_uses_outside (data, e);
2017
2018       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2019         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2020       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2021         if (!is_gimple_debug (gsi_stmt (bsi)))
2022           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2023     }
2024
2025   if (dump_file && (dump_flags & TDF_DETAILS))
2026     {
2027       bitmap_iterator bi;
2028
2029       fprintf (dump_file, "\n");
2030
2031       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2032         {
2033           info = ver_info (data, i);
2034           if (info->inv_id)
2035             {
2036               fprintf (dump_file, "  ");
2037               print_generic_expr (dump_file, info->name, TDF_SLIM);
2038               fprintf (dump_file, " is invariant (%d)%s\n",
2039                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2040             }
2041         }
2042
2043       fprintf (dump_file, "\n");
2044     }
2045
2046   free (body);
2047 }
2048
2049 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2050    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2051    we are at the top-level of the processed address.  */
2052
2053 static tree
2054 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2055                 HOST_WIDE_INT *offset)
2056 {
2057   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2058   enum tree_code code;
2059   tree type, orig_type = TREE_TYPE (expr);
2060   HOST_WIDE_INT off0, off1, st;
2061   tree orig_expr = expr;
2062
2063   STRIP_NOPS (expr);
2064
2065   type = TREE_TYPE (expr);
2066   code = TREE_CODE (expr);
2067   *offset = 0;
2068
2069   switch (code)
2070     {
2071     case INTEGER_CST:
2072       if (!cst_and_fits_in_hwi (expr)
2073           || integer_zerop (expr))
2074         return orig_expr;
2075
2076       *offset = int_cst_value (expr);
2077       return build_int_cst (orig_type, 0);
2078
2079     case POINTER_PLUS_EXPR:
2080     case PLUS_EXPR:
2081     case MINUS_EXPR:
2082       op0 = TREE_OPERAND (expr, 0);
2083       op1 = TREE_OPERAND (expr, 1);
2084
2085       op0 = strip_offset_1 (op0, false, false, &off0);
2086       op1 = strip_offset_1 (op1, false, false, &off1);
2087
2088       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2089       if (op0 == TREE_OPERAND (expr, 0)
2090           && op1 == TREE_OPERAND (expr, 1))
2091         return orig_expr;
2092
2093       if (integer_zerop (op1))
2094         expr = op0;
2095       else if (integer_zerop (op0))
2096         {
2097           if (code == MINUS_EXPR)
2098             expr = fold_build1 (NEGATE_EXPR, type, op1);
2099           else
2100             expr = op1;
2101         }
2102       else
2103         expr = fold_build2 (code, type, op0, op1);
2104
2105       return fold_convert (orig_type, expr);
2106
2107     case MULT_EXPR:
2108       op1 = TREE_OPERAND (expr, 1);
2109       if (!cst_and_fits_in_hwi (op1))
2110         return orig_expr;
2111
2112       op0 = TREE_OPERAND (expr, 0);
2113       op0 = strip_offset_1 (op0, false, false, &off0);
2114       if (op0 == TREE_OPERAND (expr, 0))
2115         return orig_expr;
2116
2117       *offset = off0 * int_cst_value (op1);
2118       if (integer_zerop (op0))
2119         expr = op0;
2120       else
2121         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2122
2123       return fold_convert (orig_type, expr);
2124
2125     case ARRAY_REF:
2126     case ARRAY_RANGE_REF:
2127       if (!inside_addr)
2128         return orig_expr;
2129
2130       step = array_ref_element_size (expr);
2131       if (!cst_and_fits_in_hwi (step))
2132         break;
2133
2134       st = int_cst_value (step);
2135       op1 = TREE_OPERAND (expr, 1);
2136       op1 = strip_offset_1 (op1, false, false, &off1);
2137       *offset = off1 * st;
2138
2139       if (top_compref
2140           && integer_zerop (op1))
2141         {
2142           /* Strip the component reference completely.  */
2143           op0 = TREE_OPERAND (expr, 0);
2144           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2145           *offset += off0;
2146           return op0;
2147         }
2148       break;
2149
2150     case COMPONENT_REF:
2151       {
2152         tree field;
2153
2154         if (!inside_addr)
2155           return orig_expr;
2156
2157         tmp = component_ref_field_offset (expr);
2158         field = TREE_OPERAND (expr, 1);
2159         if (top_compref
2160             && cst_and_fits_in_hwi (tmp)
2161             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2162           {
2163             HOST_WIDE_INT boffset, abs_off;
2164
2165             /* Strip the component reference completely.  */
2166             op0 = TREE_OPERAND (expr, 0);
2167             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2168             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2169             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2170             if (boffset < 0)
2171               abs_off = -abs_off;
2172
2173             *offset = off0 + int_cst_value (tmp) + abs_off;
2174             return op0;
2175           }
2176       }
2177       break;
2178
2179     case ADDR_EXPR:
2180       op0 = TREE_OPERAND (expr, 0);
2181       op0 = strip_offset_1 (op0, true, true, &off0);
2182       *offset += off0;
2183
2184       if (op0 == TREE_OPERAND (expr, 0))
2185         return orig_expr;
2186
2187       expr = build_fold_addr_expr (op0);
2188       return fold_convert (orig_type, expr);
2189
2190     case MEM_REF:
2191       /* ???  Offset operand?  */
2192       inside_addr = false;
2193       break;
2194
2195     default:
2196       return orig_expr;
2197     }
2198
2199   /* Default handling of expressions for that we want to recurse into
2200      the first operand.  */
2201   op0 = TREE_OPERAND (expr, 0);
2202   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2203   *offset += off0;
2204
2205   if (op0 == TREE_OPERAND (expr, 0)
2206       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2207     return orig_expr;
2208
2209   expr = copy_node (expr);
2210   TREE_OPERAND (expr, 0) = op0;
2211   if (op1)
2212     TREE_OPERAND (expr, 1) = op1;
2213
2214   /* Inside address, we might strip the top level component references,
2215      thus changing type of the expression.  Handling of ADDR_EXPR
2216      will fix that.  */
2217   expr = fold_convert (orig_type, expr);
2218
2219   return expr;
2220 }
2221
2222 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2223
2224 static tree
2225 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2226 {
2227   HOST_WIDE_INT off;
2228   tree core = strip_offset_1 (expr, false, false, &off);
2229   *offset = off;
2230   return core;
2231 }
2232
2233 /* Returns variant of TYPE that can be used as base for different uses.
2234    We return unsigned type with the same precision, which avoids problems
2235    with overflows.  */
2236
2237 static tree
2238 generic_type_for (tree type)
2239 {
2240   if (POINTER_TYPE_P (type))
2241     return unsigned_type_for (type);
2242
2243   if (TYPE_UNSIGNED (type))
2244     return type;
2245
2246   return unsigned_type_for (type);
2247 }
2248
2249 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2250    the bitmap to that we should store it.  */
2251
2252 static struct ivopts_data *fd_ivopts_data;
2253 static tree
2254 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2255 {
2256   bitmap *depends_on = (bitmap *) data;
2257   struct version_info *info;
2258
2259   if (TREE_CODE (*expr_p) != SSA_NAME)
2260     return NULL_TREE;
2261   info = name_info (fd_ivopts_data, *expr_p);
2262
2263   if (!info->inv_id || info->has_nonlin_use)
2264     return NULL_TREE;
2265
2266   if (!*depends_on)
2267     *depends_on = BITMAP_ALLOC (NULL);
2268   bitmap_set_bit (*depends_on, info->inv_id);
2269
2270   return NULL_TREE;
2271 }
2272
2273 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2274    position to POS.  If USE is not NULL, the candidate is set as related to
2275    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2276    replacement of the final value of the iv by a direct computation.  */
2277
2278 static struct iv_cand *
2279 add_candidate_1 (struct ivopts_data *data,
2280                  tree base, tree step, bool important, enum iv_position pos,
2281                  struct iv_use *use, gimple incremented_at)
2282 {
2283   unsigned i;
2284   struct iv_cand *cand = NULL;
2285   tree type, orig_type;
2286
2287   /* For non-original variables, make sure their values are computed in a type
2288      that does not invoke undefined behavior on overflows (since in general,
2289      we cannot prove that these induction variables are non-wrapping).  */
2290   if (pos != IP_ORIGINAL)
2291     {
2292       orig_type = TREE_TYPE (base);
2293       type = generic_type_for (orig_type);
2294       if (type != orig_type)
2295         {
2296           base = fold_convert (type, base);
2297           step = fold_convert (type, step);
2298         }
2299     }
2300
2301   for (i = 0; i < n_iv_cands (data); i++)
2302     {
2303       cand = iv_cand (data, i);
2304
2305       if (cand->pos != pos)
2306         continue;
2307
2308       if (cand->incremented_at != incremented_at
2309           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2310               && cand->ainc_use != use))
2311         continue;
2312
2313       if (!cand->iv)
2314         {
2315           if (!base && !step)
2316             break;
2317
2318           continue;
2319         }
2320
2321       if (!base && !step)
2322         continue;
2323
2324       if (operand_equal_p (base, cand->iv->base, 0)
2325           && operand_equal_p (step, cand->iv->step, 0)
2326           && (TYPE_PRECISION (TREE_TYPE (base))
2327               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2328         break;
2329     }
2330
2331   if (i == n_iv_cands (data))
2332     {
2333       cand = XCNEW (struct iv_cand);
2334       cand->id = i;
2335
2336       if (!base && !step)
2337         cand->iv = NULL;
2338       else
2339         cand->iv = alloc_iv (base, step);
2340
2341       cand->pos = pos;
2342       if (pos != IP_ORIGINAL && cand->iv)
2343         {
2344           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2345           cand->var_after = cand->var_before;
2346         }
2347       cand->important = important;
2348       cand->incremented_at = incremented_at;
2349       data->iv_candidates.safe_push (cand);
2350
2351       if (step
2352           && TREE_CODE (step) != INTEGER_CST)
2353         {
2354           fd_ivopts_data = data;
2355           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2356         }
2357
2358       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2359         cand->ainc_use = use;
2360       else
2361         cand->ainc_use = NULL;
2362
2363       if (dump_file && (dump_flags & TDF_DETAILS))
2364         dump_cand (dump_file, cand);
2365     }
2366
2367   if (important && !cand->important)
2368     {
2369       cand->important = true;
2370       if (dump_file && (dump_flags & TDF_DETAILS))
2371         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2372     }
2373
2374   if (use)
2375     {
2376       bitmap_set_bit (use->related_cands, i);
2377       if (dump_file && (dump_flags & TDF_DETAILS))
2378         fprintf (dump_file, "Candidate %d is related to use %d\n",
2379                  cand->id, use->id);
2380     }
2381
2382   return cand;
2383 }
2384
2385 /* Returns true if incrementing the induction variable at the end of the LOOP
2386    is allowed.
2387
2388    The purpose is to avoid splitting latch edge with a biv increment, thus
2389    creating a jump, possibly confusing other optimization passes and leaving
2390    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2391    is not available (so we do not have a better alternative), or if the latch
2392    edge is already nonempty.  */
2393
2394 static bool
2395 allow_ip_end_pos_p (struct loop *loop)
2396 {
2397   if (!ip_normal_pos (loop))
2398     return true;
2399
2400   if (!empty_block_p (ip_end_pos (loop)))
2401     return true;
2402
2403   return false;
2404 }
2405
2406 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2407    Important field is set to IMPORTANT.  */
2408
2409 static void
2410 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2411                         bool important, struct iv_use *use)
2412 {
2413   basic_block use_bb = gimple_bb (use->stmt);
2414   enum machine_mode mem_mode;
2415   unsigned HOST_WIDE_INT cstepi;
2416
2417   /* If we insert the increment in any position other than the standard
2418      ones, we must ensure that it is incremented once per iteration.
2419      It must not be in an inner nested loop, or one side of an if
2420      statement.  */
2421   if (use_bb->loop_father != data->current_loop
2422       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2423       || stmt_could_throw_p (use->stmt)
2424       || !cst_and_fits_in_hwi (step))
2425     return;
2426
2427   cstepi = int_cst_value (step);
2428
2429   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2430   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2431         || USE_STORE_PRE_INCREMENT (mem_mode))
2432        && GET_MODE_SIZE (mem_mode) == cstepi)
2433       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2434            || USE_STORE_PRE_DECREMENT (mem_mode))
2435           && GET_MODE_SIZE (mem_mode) == -cstepi))
2436     {
2437       enum tree_code code = MINUS_EXPR;
2438       tree new_base;
2439       tree new_step = step;
2440
2441       if (POINTER_TYPE_P (TREE_TYPE (base)))
2442         {
2443           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2444           code = POINTER_PLUS_EXPR;
2445         }
2446       else
2447         new_step = fold_convert (TREE_TYPE (base), new_step);
2448       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2449       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2450                        use->stmt);
2451     }
2452   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2453         || USE_STORE_POST_INCREMENT (mem_mode))
2454        && GET_MODE_SIZE (mem_mode) == cstepi)
2455       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2456            || USE_STORE_POST_DECREMENT (mem_mode))
2457           && GET_MODE_SIZE (mem_mode) == -cstepi))
2458     {
2459       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2460                        use->stmt);
2461     }
2462 }
2463
2464 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2465    position to POS.  If USE is not NULL, the candidate is set as related to
2466    it.  The candidate computation is scheduled on all available positions.  */
2467
2468 static void
2469 add_candidate (struct ivopts_data *data,
2470                tree base, tree step, bool important, struct iv_use *use)
2471 {
2472   if (ip_normal_pos (data->current_loop))
2473     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2474   if (ip_end_pos (data->current_loop)
2475       && allow_ip_end_pos_p (data->current_loop))
2476     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2477
2478   if (use != NULL && use->type == USE_ADDRESS)
2479     add_autoinc_candidates (data, base, step, important, use);
2480 }
2481
2482 /* Adds standard iv candidates.  */
2483
2484 static void
2485 add_standard_iv_candidates (struct ivopts_data *data)
2486 {
2487   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2488
2489   /* The same for a double-integer type if it is still fast enough.  */
2490   if (TYPE_PRECISION
2491         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2492       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2493     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2494                    build_int_cst (long_integer_type_node, 1), true, NULL);
2495
2496   /* The same for a double-integer type if it is still fast enough.  */
2497   if (TYPE_PRECISION
2498         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2499       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2500     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2501                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2502 }
2503
2504
2505 /* Adds candidates bases on the old induction variable IV.  */
2506
2507 static void
2508 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2509 {
2510   gimple phi;
2511   tree def;
2512   struct iv_cand *cand;
2513
2514   add_candidate (data, iv->base, iv->step, true, NULL);
2515
2516   /* The same, but with initial value zero.  */
2517   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2518     add_candidate (data, size_int (0), iv->step, true, NULL);
2519   else
2520     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2521                    iv->step, true, NULL);
2522
2523   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2524   if (gimple_code (phi) == GIMPLE_PHI)
2525     {
2526       /* Additionally record the possibility of leaving the original iv
2527          untouched.  */
2528       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2529       cand = add_candidate_1 (data,
2530                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2531                               SSA_NAME_DEF_STMT (def));
2532       cand->var_before = iv->ssa_name;
2533       cand->var_after = def;
2534     }
2535 }
2536
2537 /* Adds candidates based on the old induction variables.  */
2538
2539 static void
2540 add_old_ivs_candidates (struct ivopts_data *data)
2541 {
2542   unsigned i;
2543   struct iv *iv;
2544   bitmap_iterator bi;
2545
2546   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2547     {
2548       iv = ver_info (data, i)->iv;
2549       if (iv && iv->biv_p && !integer_zerop (iv->step))
2550         add_old_iv_candidates (data, iv);
2551     }
2552 }
2553
2554 /* Adds candidates based on the value of the induction variable IV and USE.  */
2555
2556 static void
2557 add_iv_value_candidates (struct ivopts_data *data,
2558                          struct iv *iv, struct iv_use *use)
2559 {
2560   unsigned HOST_WIDE_INT offset;
2561   tree base;
2562   tree basetype;
2563
2564   add_candidate (data, iv->base, iv->step, false, use);
2565
2566   /* The same, but with initial value zero.  Make such variable important,
2567      since it is generic enough so that possibly many uses may be based
2568      on it.  */
2569   basetype = TREE_TYPE (iv->base);
2570   if (POINTER_TYPE_P (basetype))
2571     basetype = sizetype;
2572   add_candidate (data, build_int_cst (basetype, 0),
2573                  iv->step, true, use);
2574
2575   /* Third, try removing the constant offset.  Make sure to even
2576      add a candidate for &a[0] vs. (T *)&a.  */
2577   base = strip_offset (iv->base, &offset);
2578   if (offset
2579       || base != iv->base)
2580     add_candidate (data, base, iv->step, false, use);
2581 }
2582
2583 /* Adds candidates based on the uses.  */
2584
2585 static void
2586 add_derived_ivs_candidates (struct ivopts_data *data)
2587 {
2588   unsigned i;
2589
2590   for (i = 0; i < n_iv_uses (data); i++)
2591     {
2592       struct iv_use *use = iv_use (data, i);
2593
2594       if (!use)
2595         continue;
2596
2597       switch (use->type)
2598         {
2599         case USE_NONLINEAR_EXPR:
2600         case USE_COMPARE:
2601         case USE_ADDRESS:
2602           /* Just add the ivs based on the value of the iv used here.  */
2603           add_iv_value_candidates (data, use->iv, use);
2604           break;
2605
2606         default:
2607           gcc_unreachable ();
2608         }
2609     }
2610 }
2611
2612 /* Record important candidates and add them to related_cands bitmaps
2613    if needed.  */
2614
2615 static void
2616 record_important_candidates (struct ivopts_data *data)
2617 {
2618   unsigned i;
2619   struct iv_use *use;
2620
2621   for (i = 0; i < n_iv_cands (data); i++)
2622     {
2623       struct iv_cand *cand = iv_cand (data, i);
2624
2625       if (cand->important)
2626         bitmap_set_bit (data->important_candidates, i);
2627     }
2628
2629   data->consider_all_candidates = (n_iv_cands (data)
2630                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2631
2632   if (data->consider_all_candidates)
2633     {
2634       /* We will not need "related_cands" bitmaps in this case,
2635          so release them to decrease peak memory consumption.  */
2636       for (i = 0; i < n_iv_uses (data); i++)
2637         {
2638           use = iv_use (data, i);
2639           BITMAP_FREE (use->related_cands);
2640         }
2641     }
2642   else
2643     {
2644       /* Add important candidates to the related_cands bitmaps.  */
2645       for (i = 0; i < n_iv_uses (data); i++)
2646         bitmap_ior_into (iv_use (data, i)->related_cands,
2647                          data->important_candidates);
2648     }
2649 }
2650
2651 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2652    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2653    we allocate a simple list to every use.  */
2654
2655 static void
2656 alloc_use_cost_map (struct ivopts_data *data)
2657 {
2658   unsigned i, size, s;
2659
2660   for (i = 0; i < n_iv_uses (data); i++)
2661     {
2662       struct iv_use *use = iv_use (data, i);
2663
2664       if (data->consider_all_candidates)
2665         size = n_iv_cands (data);
2666       else
2667         {
2668           s = bitmap_count_bits (use->related_cands);
2669
2670           /* Round up to the power of two, so that moduling by it is fast.  */
2671           size = s ? (1 << ceil_log2 (s)) : 1;
2672         }
2673
2674       use->n_map_members = size;
2675       use->cost_map = XCNEWVEC (struct cost_pair, size);
2676     }
2677 }
2678
2679 /* Returns description of computation cost of expression whose runtime
2680    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2681
2682 static comp_cost
2683 new_cost (unsigned runtime, unsigned complexity)
2684 {
2685   comp_cost cost;
2686
2687   cost.cost = runtime;
2688   cost.complexity = complexity;
2689
2690   return cost;
2691 }
2692
2693 /* Adds costs COST1 and COST2.  */
2694
2695 static comp_cost
2696 add_costs (comp_cost cost1, comp_cost cost2)
2697 {
2698   cost1.cost += cost2.cost;
2699   cost1.complexity += cost2.complexity;
2700
2701   return cost1;
2702 }
2703 /* Subtracts costs COST1 and COST2.  */
2704
2705 static comp_cost
2706 sub_costs (comp_cost cost1, comp_cost cost2)
2707 {
2708   cost1.cost -= cost2.cost;
2709   cost1.complexity -= cost2.complexity;
2710
2711   return cost1;
2712 }
2713
2714 /* Returns a negative number if COST1 < COST2, a positive number if
2715    COST1 > COST2, and 0 if COST1 = COST2.  */
2716
2717 static int
2718 compare_costs (comp_cost cost1, comp_cost cost2)
2719 {
2720   if (cost1.cost == cost2.cost)
2721     return cost1.complexity - cost2.complexity;
2722
2723   return cost1.cost - cost2.cost;
2724 }
2725
2726 /* Returns true if COST is infinite.  */
2727
2728 static bool
2729 infinite_cost_p (comp_cost cost)
2730 {
2731   return cost.cost == INFTY;
2732 }
2733
2734 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2735    on invariants DEPENDS_ON and that the value used in expressing it
2736    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2737
2738 static void
2739 set_use_iv_cost (struct ivopts_data *data,
2740                  struct iv_use *use, struct iv_cand *cand,
2741                  comp_cost cost, bitmap depends_on, tree value,
2742                  enum tree_code comp, int inv_expr_id)
2743 {
2744   unsigned i, s;
2745
2746   if (infinite_cost_p (cost))
2747     {
2748       BITMAP_FREE (depends_on);
2749       return;
2750     }
2751
2752   if (data->consider_all_candidates)
2753     {
2754       use->cost_map[cand->id].cand = cand;
2755       use->cost_map[cand->id].cost = cost;
2756       use->cost_map[cand->id].depends_on = depends_on;
2757       use->cost_map[cand->id].value = value;
2758       use->cost_map[cand->id].comp = comp;
2759       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2760       return;
2761     }
2762
2763   /* n_map_members is a power of two, so this computes modulo.  */
2764   s = cand->id & (use->n_map_members - 1);
2765   for (i = s; i < use->n_map_members; i++)
2766     if (!use->cost_map[i].cand)
2767       goto found;
2768   for (i = 0; i < s; i++)
2769     if (!use->cost_map[i].cand)
2770       goto found;
2771
2772   gcc_unreachable ();
2773
2774 found:
2775   use->cost_map[i].cand = cand;
2776   use->cost_map[i].cost = cost;
2777   use->cost_map[i].depends_on = depends_on;
2778   use->cost_map[i].value = value;
2779   use->cost_map[i].comp = comp;
2780   use->cost_map[i].inv_expr_id = inv_expr_id;
2781 }
2782
2783 /* Gets cost of (USE, CANDIDATE) pair.  */
2784
2785 static struct cost_pair *
2786 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2787                  struct iv_cand *cand)
2788 {
2789   unsigned i, s;
2790   struct cost_pair *ret;
2791
2792   if (!cand)
2793     return NULL;
2794
2795   if (data->consider_all_candidates)
2796     {
2797       ret = use->cost_map + cand->id;
2798       if (!ret->cand)
2799         return NULL;
2800
2801       return ret;
2802     }
2803
2804   /* n_map_members is a power of two, so this computes modulo.  */
2805   s = cand->id & (use->n_map_members - 1);
2806   for (i = s; i < use->n_map_members; i++)
2807     if (use->cost_map[i].cand == cand)
2808       return use->cost_map + i;
2809     else if (use->cost_map[i].cand == NULL)
2810       return NULL;
2811   for (i = 0; i < s; i++)
2812     if (use->cost_map[i].cand == cand)
2813       return use->cost_map + i;
2814     else if (use->cost_map[i].cand == NULL)
2815       return NULL;
2816
2817   return NULL;
2818 }
2819
2820 /* Returns estimate on cost of computing SEQ.  */
2821
2822 static unsigned
2823 seq_cost (rtx seq, bool speed)
2824 {
2825   unsigned cost = 0;
2826   rtx set;
2827
2828   for (; seq; seq = NEXT_INSN (seq))
2829     {
2830       set = single_set (seq);
2831       if (set)
2832         cost += set_src_cost (SET_SRC (set), speed);
2833       else
2834         cost++;
2835     }
2836
2837   return cost;
2838 }
2839
2840 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2841 static rtx
2842 produce_memory_decl_rtl (tree obj, int *regno)
2843 {
2844   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2845   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2846   rtx x;
2847
2848   gcc_assert (obj);
2849   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2850     {
2851       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2852       x = gen_rtx_SYMBOL_REF (address_mode, name);
2853       SET_SYMBOL_REF_DECL (x, obj);
2854       x = gen_rtx_MEM (DECL_MODE (obj), x);
2855       set_mem_addr_space (x, as);
2856       targetm.encode_section_info (obj, x, true);
2857     }
2858   else
2859     {
2860       x = gen_raw_REG (address_mode, (*regno)++);
2861       x = gen_rtx_MEM (DECL_MODE (obj), x);
2862       set_mem_addr_space (x, as);
2863     }
2864
2865   return x;
2866 }
2867
2868 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2869    walk_tree.  DATA contains the actual fake register number.  */
2870
2871 static tree
2872 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2873 {
2874   tree obj = NULL_TREE;
2875   rtx x = NULL_RTX;
2876   int *regno = (int *) data;
2877
2878   switch (TREE_CODE (*expr_p))
2879     {
2880     case ADDR_EXPR:
2881       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2882            handled_component_p (*expr_p);
2883            expr_p = &TREE_OPERAND (*expr_p, 0))
2884         continue;
2885       obj = *expr_p;
2886       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2887         x = produce_memory_decl_rtl (obj, regno);
2888       break;
2889
2890     case SSA_NAME:
2891       *ws = 0;
2892       obj = SSA_NAME_VAR (*expr_p);
2893       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2894       if (!obj)
2895         return NULL_TREE;
2896       if (!DECL_RTL_SET_P (obj))
2897         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2898       break;
2899
2900     case VAR_DECL:
2901     case PARM_DECL:
2902     case RESULT_DECL:
2903       *ws = 0;
2904       obj = *expr_p;
2905
2906       if (DECL_RTL_SET_P (obj))
2907         break;
2908
2909       if (DECL_MODE (obj) == BLKmode)
2910         x = produce_memory_decl_rtl (obj, regno);
2911       else
2912         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2913
2914       break;
2915
2916     default:
2917       break;
2918     }
2919
2920   if (x)
2921     {
2922       decl_rtl_to_reset.safe_push (obj);
2923       SET_DECL_RTL (obj, x);
2924     }
2925
2926   return NULL_TREE;
2927 }
2928
2929 /* Determines cost of the computation of EXPR.  */
2930
2931 static unsigned
2932 computation_cost (tree expr, bool speed)
2933 {
2934   rtx seq, rslt;
2935   tree type = TREE_TYPE (expr);
2936   unsigned cost;
2937   /* Avoid using hard regs in ways which may be unsupported.  */
2938   int regno = LAST_VIRTUAL_REGISTER + 1;
2939   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2940   enum node_frequency real_frequency = node->frequency;
2941
2942   node->frequency = NODE_FREQUENCY_NORMAL;
2943   crtl->maybe_hot_insn_p = speed;
2944   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2945   start_sequence ();
2946   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2947   seq = get_insns ();
2948   end_sequence ();
2949   default_rtl_profile ();
2950   node->frequency = real_frequency;
2951
2952   cost = seq_cost (seq, speed);
2953   if (MEM_P (rslt))
2954     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2955                           TYPE_ADDR_SPACE (type), speed);
2956   else if (!REG_P (rslt))
2957     cost += set_src_cost (rslt, speed);
2958
2959   return cost;
2960 }
2961
2962 /* Returns variable containing the value of candidate CAND at statement AT.  */
2963
2964 static tree
2965 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2966 {
2967   if (stmt_after_increment (loop, cand, stmt))
2968     return cand->var_after;
2969   else
2970     return cand->var_before;
2971 }
2972
2973 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2974    same precision that is at least as wide as the precision of TYPE, stores
2975    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2976    type of A and B.  */
2977
2978 static tree
2979 determine_common_wider_type (tree *a, tree *b)
2980 {
2981   tree wider_type = NULL;
2982   tree suba, subb;
2983   tree atype = TREE_TYPE (*a);
2984
2985   if (CONVERT_EXPR_P (*a))
2986     {
2987       suba = TREE_OPERAND (*a, 0);
2988       wider_type = TREE_TYPE (suba);
2989       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2990         return atype;
2991     }
2992   else
2993     return atype;
2994
2995   if (CONVERT_EXPR_P (*b))
2996     {
2997       subb = TREE_OPERAND (*b, 0);
2998       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2999         return atype;
3000     }
3001   else
3002     return atype;
3003
3004   *a = suba;
3005   *b = subb;
3006   return wider_type;
3007 }
3008
3009 /* Determines the expression by that USE is expressed from induction variable
3010    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3011    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3012
3013 static bool
3014 get_computation_aff (struct loop *loop,
3015                      struct iv_use *use, struct iv_cand *cand, gimple at,
3016                      struct affine_tree_combination *aff)
3017 {
3018   tree ubase = use->iv->base;
3019   tree ustep = use->iv->step;
3020   tree cbase = cand->iv->base;
3021   tree cstep = cand->iv->step, cstep_common;
3022   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3023   tree common_type, var;
3024   tree uutype;
3025   aff_tree cbase_aff, var_aff;
3026   double_int rat;
3027
3028   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3029     {
3030       /* We do not have a precision to express the values of use.  */
3031       return false;
3032     }
3033
3034   var = var_at_stmt (loop, cand, at);
3035   uutype = unsigned_type_for (utype);
3036
3037   /* If the conversion is not noop, perform it.  */
3038   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3039     {
3040       cstep = fold_convert (uutype, cstep);
3041       cbase = fold_convert (uutype, cbase);
3042       var = fold_convert (uutype, var);
3043     }
3044
3045   if (!constant_multiple_of (ustep, cstep, &rat))
3046     return false;
3047
3048   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3049      type, we achieve better folding by computing their difference in this
3050      wider type, and cast the result to UUTYPE.  We do not need to worry about
3051      overflows, as all the arithmetics will in the end be performed in UUTYPE
3052      anyway.  */
3053   common_type = determine_common_wider_type (&ubase, &cbase);
3054
3055   /* use = ubase - ratio * cbase + ratio * var.  */
3056   tree_to_aff_combination (ubase, common_type, aff);
3057   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3058   tree_to_aff_combination (var, uutype, &var_aff);
3059
3060   /* We need to shift the value if we are after the increment.  */
3061   if (stmt_after_increment (loop, cand, at))
3062     {
3063       aff_tree cstep_aff;
3064
3065       if (common_type != uutype)
3066         cstep_common = fold_convert (common_type, cstep);
3067       else
3068         cstep_common = cstep;
3069
3070       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3071       aff_combination_add (&cbase_aff, &cstep_aff);
3072     }
3073
3074   aff_combination_scale (&cbase_aff, -rat);
3075   aff_combination_add (aff, &cbase_aff);
3076   if (common_type != uutype)
3077     aff_combination_convert (aff, uutype);
3078
3079   aff_combination_scale (&var_aff, rat);
3080   aff_combination_add (aff, &var_aff);
3081
3082   return true;
3083 }
3084
3085 /* Return the type of USE.  */
3086
3087 static tree
3088 get_use_type (struct iv_use *use)
3089 {
3090   tree base_type = TREE_TYPE (use->iv->base);
3091   tree type;
3092
3093   if (use->type == USE_ADDRESS)
3094     {
3095       /* The base_type may be a void pointer.  Create a pointer type based on
3096          the mem_ref instead.  */
3097       type = build_pointer_type (TREE_TYPE (*use->op_p));
3098       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3099                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3100     }
3101   else
3102     type = base_type;
3103
3104   return type;
3105 }
3106
3107 /* Determines the expression by that USE is expressed from induction variable
3108    CAND at statement AT in LOOP.  The computation is unshared.  */
3109
3110 static tree
3111 get_computation_at (struct loop *loop,
3112                     struct iv_use *use, struct iv_cand *cand, gimple at)
3113 {
3114   aff_tree aff;
3115   tree type = get_use_type (use);
3116
3117   if (!get_computation_aff (loop, use, cand, at, &aff))
3118     return NULL_TREE;
3119   unshare_aff_combination (&aff);
3120   return fold_convert (type, aff_combination_to_tree (&aff));
3121 }
3122
3123 /* Determines the expression by that USE is expressed from induction variable
3124    CAND in LOOP.  The computation is unshared.  */
3125
3126 static tree
3127 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3128 {
3129   return get_computation_at (loop, use, cand, use->stmt);
3130 }
3131
3132 /* Adjust the cost COST for being in loop setup rather than loop body.
3133    If we're optimizing for space, the loop setup overhead is constant;
3134    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3135 static unsigned
3136 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3137 {
3138   if (cost == INFTY)
3139     return cost;
3140   else if (optimize_loop_for_speed_p (data->current_loop))
3141     return cost / avg_loop_niter (data->current_loop);
3142   else
3143     return cost;
3144 }
3145
3146 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3147    validity for a memory reference accessing memory of mode MODE in
3148    address space AS.  */
3149
3150
3151 bool
3152 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3153                                  addr_space_t as)
3154 {
3155 #define MAX_RATIO 128
3156   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3157   static vec<sbitmap> valid_mult_list;
3158   sbitmap valid_mult;
3159
3160   if (data_index >= valid_mult_list.length ())
3161     valid_mult_list.safe_grow_cleared (data_index + 1);
3162
3163   valid_mult = valid_mult_list[data_index];
3164   if (!valid_mult)
3165     {
3166       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3167       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3168       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3169       rtx addr, scaled;
3170       HOST_WIDE_INT i;
3171
3172       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3173       bitmap_clear (valid_mult);
3174       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3175       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3176       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3177         {
3178           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3179           if (memory_address_addr_space_p (mode, addr, as)
3180               || memory_address_addr_space_p (mode, scaled, as))
3181             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3182         }
3183
3184       if (dump_file && (dump_flags & TDF_DETAILS))
3185         {
3186           fprintf (dump_file, "  allowed multipliers:");
3187           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3188             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3189               fprintf (dump_file, " %d", (int) i);
3190           fprintf (dump_file, "\n");
3191           fprintf (dump_file, "\n");
3192         }
3193
3194       valid_mult_list[data_index] = valid_mult;
3195     }
3196
3197   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3198     return false;
3199
3200   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3201 }
3202
3203 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3204    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3205    variable is omitted.  Compute the cost for a memory reference that accesses
3206    a memory location of mode MEM_MODE in address space AS.
3207
3208    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3209    size of MEM_MODE / RATIO) is available.  To make this determination, we
3210    look at the size of the increment to be made, which is given in CSTEP.
3211    CSTEP may be zero if the step is unknown.
3212    STMT_AFTER_INC is true iff the statement we're looking at is after the
3213    increment of the original biv.
3214
3215    TODO -- there must be some better way.  This all is quite crude.  */
3216
3217 enum ainc_type
3218 {
3219   AINC_PRE_INC,         /* Pre increment.  */
3220   AINC_PRE_DEC,         /* Pre decrement.  */
3221   AINC_POST_INC,        /* Post increment.  */
3222   AINC_POST_DEC,        /* Post decrement.  */
3223   AINC_NONE             /* Also the number of auto increment types.  */
3224 };
3225
3226 typedef struct address_cost_data_s
3227 {
3228   HOST_WIDE_INT min_offset, max_offset;
3229   unsigned costs[2][2][2][2];
3230   unsigned ainc_costs[AINC_NONE];
3231 } *address_cost_data;
3232
3233
3234 static comp_cost
3235 get_address_cost (bool symbol_present, bool var_present,
3236                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3237                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3238                   addr_space_t as, bool speed,
3239                   bool stmt_after_inc, bool *may_autoinc)
3240 {
3241   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3242   static vec<address_cost_data> address_cost_data_list;
3243   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3244   address_cost_data data;
3245   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3246   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3247   unsigned cost, acost, complexity;
3248   enum ainc_type autoinc_type;
3249   bool offset_p, ratio_p, autoinc;
3250   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3251   unsigned HOST_WIDE_INT mask;
3252   unsigned bits;
3253
3254   if (data_index >= address_cost_data_list.length ())
3255     address_cost_data_list.safe_grow_cleared (data_index + 1);
3256
3257   data = address_cost_data_list[data_index];
3258   if (!data)
3259     {
3260       HOST_WIDE_INT i;
3261       HOST_WIDE_INT rat, off = 0;
3262       int old_cse_not_expected, width;
3263       unsigned sym_p, var_p, off_p, rat_p, add_c;
3264       rtx seq, addr, base;
3265       rtx reg0, reg1;
3266
3267       data = (address_cost_data) xcalloc (1, sizeof (*data));
3268
3269       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3270
3271       width = GET_MODE_BITSIZE (address_mode) - 1;
3272       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3273         width = HOST_BITS_PER_WIDE_INT - 1;
3274       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3275
3276       for (i = width; i >= 0; i--)
3277         {
3278           off = -((unsigned HOST_WIDE_INT) 1 << i);
3279           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3280           if (memory_address_addr_space_p (mem_mode, addr, as))
3281             break;
3282         }
3283       data->min_offset = (i == -1? 0 : off);
3284
3285       for (i = width; i >= 0; i--)
3286         {
3287           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3288           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3289           if (memory_address_addr_space_p (mem_mode, addr, as))
3290             break;
3291         }
3292       if (i == -1)
3293         off = 0;
3294       data->max_offset = off;
3295
3296       if (dump_file && (dump_flags & TDF_DETAILS))
3297         {
3298           fprintf (dump_file, "get_address_cost:\n");
3299           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3300                    GET_MODE_NAME (mem_mode),
3301                    data->min_offset);
3302           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3303                    GET_MODE_NAME (mem_mode),
3304                    data->max_offset);
3305         }
3306
3307       rat = 1;
3308       for (i = 2; i <= MAX_RATIO; i++)
3309         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3310           {
3311             rat = i;
3312             break;
3313           }
3314
3315       /* Compute the cost of various addressing modes.  */
3316       acost = 0;
3317       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3318       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3319
3320       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3321           || USE_STORE_PRE_DECREMENT (mem_mode))
3322         {
3323           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3324           has_predec[mem_mode]
3325             = memory_address_addr_space_p (mem_mode, addr, as);
3326
3327           if (has_predec[mem_mode])
3328             data->ainc_costs[AINC_PRE_DEC]
3329               = address_cost (addr, mem_mode, as, speed);
3330         }
3331       if (USE_LOAD_POST_DECREMENT (mem_mode)
3332           || USE_STORE_POST_DECREMENT (mem_mode))
3333         {
3334           addr = gen_rtx_POST_DEC (address_mode, reg0);
3335           has_postdec[mem_mode]
3336             = memory_address_addr_space_p (mem_mode, addr, as);
3337
3338           if (has_postdec[mem_mode])
3339             data->ainc_costs[AINC_POST_DEC]
3340               = address_cost (addr, mem_mode, as, speed);
3341         }
3342       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3343           || USE_STORE_PRE_DECREMENT (mem_mode))
3344         {
3345           addr = gen_rtx_PRE_INC (address_mode, reg0);
3346           has_preinc[mem_mode]
3347             = memory_address_addr_space_p (mem_mode, addr, as);
3348
3349           if (has_preinc[mem_mode])
3350             data->ainc_costs[AINC_PRE_INC]
3351               = address_cost (addr, mem_mode, as, speed);
3352         }
3353       if (USE_LOAD_POST_INCREMENT (mem_mode)
3354           || USE_STORE_POST_INCREMENT (mem_mode))
3355         {
3356           addr = gen_rtx_POST_INC (address_mode, reg0);
3357           has_postinc[mem_mode]
3358             = memory_address_addr_space_p (mem_mode, addr, as);
3359
3360           if (has_postinc[mem_mode])
3361             data->ainc_costs[AINC_POST_INC]
3362               = address_cost (addr, mem_mode, as, speed);
3363         }
3364       for (i = 0; i < 16; i++)
3365         {
3366           sym_p = i & 1;
3367           var_p = (i >> 1) & 1;
3368           off_p = (i >> 2) & 1;
3369           rat_p = (i >> 3) & 1;
3370
3371           addr = reg0;
3372           if (rat_p)
3373             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3374                                    gen_int_mode (rat, address_mode));
3375
3376           if (var_p)
3377             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3378
3379           if (sym_p)
3380             {
3381               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3382               /* ??? We can run into trouble with some backends by presenting
3383                  it with symbols which haven't been properly passed through
3384                  targetm.encode_section_info.  By setting the local bit, we
3385                  enhance the probability of things working.  */
3386               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3387
3388               if (off_p)
3389                 base = gen_rtx_fmt_e (CONST, address_mode,
3390                                       gen_rtx_fmt_ee
3391                                         (PLUS, address_mode, base,
3392                                          gen_int_mode (off, address_mode)));
3393             }
3394           else if (off_p)
3395             base = gen_int_mode (off, address_mode);
3396           else
3397             base = NULL_RTX;
3398
3399           if (base)
3400             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3401
3402           start_sequence ();
3403           /* To avoid splitting addressing modes, pretend that no cse will
3404              follow.  */
3405           old_cse_not_expected = cse_not_expected;
3406           cse_not_expected = true;
3407           addr = memory_address_addr_space (mem_mode, addr, as);
3408           cse_not_expected = old_cse_not_expected;
3409           seq = get_insns ();
3410           end_sequence ();
3411
3412           acost = seq_cost (seq, speed);
3413           acost += address_cost (addr, mem_mode, as, speed);
3414
3415           if (!acost)
3416             acost = 1;
3417           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3418         }
3419
3420       /* On some targets, it is quite expensive to load symbol to a register,
3421          which makes addresses that contain symbols look much more expensive.
3422          However, the symbol will have to be loaded in any case before the
3423          loop (and quite likely we have it in register already), so it does not
3424          make much sense to penalize them too heavily.  So make some final
3425          tweaks for the SYMBOL_PRESENT modes:
3426
3427          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3428          var is cheaper, use this mode with small penalty.
3429          If VAR_PRESENT is true, try whether the mode with
3430          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3431          if this is the case, use it.  */
3432       add_c = add_cost (speed, address_mode);
3433       for (i = 0; i < 8; i++)
3434         {
3435           var_p = i & 1;
3436           off_p = (i >> 1) & 1;
3437           rat_p = (i >> 2) & 1;
3438
3439           acost = data->costs[0][1][off_p][rat_p] + 1;
3440           if (var_p)
3441             acost += add_c;
3442
3443           if (acost < data->costs[1][var_p][off_p][rat_p])
3444             data->costs[1][var_p][off_p][rat_p] = acost;
3445         }
3446
3447       if (dump_file && (dump_flags & TDF_DETAILS))
3448         {
3449           fprintf (dump_file, "Address costs:\n");
3450
3451           for (i = 0; i < 16; i++)
3452             {
3453               sym_p = i & 1;
3454               var_p = (i >> 1) & 1;
3455               off_p = (i >> 2) & 1;
3456               rat_p = (i >> 3) & 1;
3457
3458               fprintf (dump_file, "  ");
3459               if (sym_p)
3460                 fprintf (dump_file, "sym + ");
3461               if (var_p)
3462                 fprintf (dump_file, "var + ");
3463               if (off_p)
3464                 fprintf (dump_file, "cst + ");
3465               if (rat_p)
3466                 fprintf (dump_file, "rat * ");
3467
3468               acost = data->costs[sym_p][var_p][off_p][rat_p];
3469               fprintf (dump_file, "index costs %d\n", acost);
3470             }
3471           if (has_predec[mem_mode] || has_postdec[mem_mode]
3472               || has_preinc[mem_mode] || has_postinc[mem_mode])
3473             fprintf (dump_file, "  May include autoinc/dec\n");
3474           fprintf (dump_file, "\n");
3475         }
3476
3477       address_cost_data_list[data_index] = data;
3478     }
3479
3480   bits = GET_MODE_BITSIZE (address_mode);
3481   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3482   offset &= mask;
3483   if ((offset >> (bits - 1) & 1))
3484     offset |= ~mask;
3485   s_offset = offset;
3486
3487   autoinc = false;
3488   autoinc_type = AINC_NONE;
3489   msize = GET_MODE_SIZE (mem_mode);
3490   autoinc_offset = offset;
3491   if (stmt_after_inc)
3492     autoinc_offset += ratio * cstep;
3493   if (symbol_present || var_present || ratio != 1)
3494     autoinc = false;
3495   else
3496     {
3497       if (has_postinc[mem_mode] && autoinc_offset == 0
3498           && msize == cstep)
3499         autoinc_type = AINC_POST_INC;
3500       else if (has_postdec[mem_mode] && autoinc_offset == 0
3501                && msize == -cstep)
3502         autoinc_type = AINC_POST_DEC;
3503       else if (has_preinc[mem_mode] && autoinc_offset == msize
3504                && msize == cstep)
3505         autoinc_type = AINC_PRE_INC;
3506       else if (has_predec[mem_mode] && autoinc_offset == -msize
3507                && msize == -cstep)
3508         autoinc_type = AINC_PRE_DEC;
3509
3510       if (autoinc_type != AINC_NONE)
3511         autoinc = true;
3512     }
3513
3514   cost = 0;
3515   offset_p = (s_offset != 0
3516               && data->min_offset <= s_offset
3517               && s_offset <= data->max_offset);
3518   ratio_p = (ratio != 1
3519              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3520
3521   if (ratio != 1 && !ratio_p)
3522     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3523
3524   if (s_offset && !offset_p && !symbol_present)
3525     cost += add_cost (speed, address_mode);
3526
3527   if (may_autoinc)
3528     *may_autoinc = autoinc;
3529   if (autoinc)
3530     acost = data->ainc_costs[autoinc_type];
3531   else
3532     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3533   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3534   return new_cost (cost + acost, complexity);
3535 }
3536
3537  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3538     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3539     calculating the operands of EXPR.  Returns true if successful, and returns
3540     the cost in COST.  */
3541
3542 static bool
3543 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3544                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3545 {
3546   comp_cost res;
3547   tree op1 = TREE_OPERAND (expr, 1);
3548   tree cst = TREE_OPERAND (mult, 1);
3549   tree multop = TREE_OPERAND (mult, 0);
3550   int m = exact_log2 (int_cst_value (cst));
3551   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3552   int sa_cost;
3553   bool equal_p = false;
3554
3555   if (!(m >= 0 && m < maxm))
3556     return false;
3557
3558   if (operand_equal_p (op1, mult, 0))
3559     equal_p = true;
3560
3561   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3562              ? shiftadd_cost (speed, mode, m)
3563              : (equal_p
3564                 ? shiftsub1_cost (speed, mode, m)
3565                 : shiftsub0_cost (speed, mode, m)));
3566   res = new_cost (sa_cost, 0);
3567   res = add_costs (res, equal_p ? cost0 : cost1);
3568
3569   STRIP_NOPS (multop);
3570   if (!is_gimple_val (multop))
3571     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3572
3573   *cost = res;
3574   return true;
3575 }
3576
3577 /* Estimates cost of forcing expression EXPR into a variable.  */
3578
3579 static comp_cost
3580 force_expr_to_var_cost (tree expr, bool speed)
3581 {
3582   static bool costs_initialized = false;
3583   static unsigned integer_cost [2];
3584   static unsigned symbol_cost [2];
3585   static unsigned address_cost [2];
3586   tree op0, op1;
3587   comp_cost cost0, cost1, cost;
3588   enum machine_mode mode;
3589
3590   if (!costs_initialized)
3591     {
3592       tree type = build_pointer_type (integer_type_node);
3593       tree var, addr;
3594       rtx x;
3595       int i;
3596
3597       var = create_tmp_var_raw (integer_type_node, "test_var");
3598       TREE_STATIC (var) = 1;
3599       x = produce_memory_decl_rtl (var, NULL);
3600       SET_DECL_RTL (var, x);
3601
3602       addr = build1 (ADDR_EXPR, type, var);
3603
3604
3605       for (i = 0; i < 2; i++)
3606         {
3607           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3608                                                              2000), i);
3609
3610           symbol_cost[i] = computation_cost (addr, i) + 1;
3611
3612           address_cost[i]
3613             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3614           if (dump_file && (dump_flags & TDF_DETAILS))
3615             {
3616               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3617               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3618               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3619               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3620               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3621               fprintf (dump_file, "\n");
3622             }
3623         }
3624
3625       costs_initialized = true;
3626     }
3627
3628   STRIP_NOPS (expr);
3629
3630   if (SSA_VAR_P (expr))
3631     return no_cost;
3632
3633   if (is_gimple_min_invariant (expr))
3634     {
3635       if (TREE_CODE (expr) == INTEGER_CST)
3636         return new_cost (integer_cost [speed], 0);
3637
3638       if (TREE_CODE (expr) == ADDR_EXPR)
3639         {
3640           tree obj = TREE_OPERAND (expr, 0);
3641
3642           if (TREE_CODE (obj) == VAR_DECL
3643               || TREE_CODE (obj) == PARM_DECL
3644               || TREE_CODE (obj) == RESULT_DECL)
3645             return new_cost (symbol_cost [speed], 0);
3646         }
3647
3648       return new_cost (address_cost [speed], 0);
3649     }
3650
3651   switch (TREE_CODE (expr))
3652     {
3653     case POINTER_PLUS_EXPR:
3654     case PLUS_EXPR:
3655     case MINUS_EXPR:
3656     case MULT_EXPR:
3657       op0 = TREE_OPERAND (expr, 0);
3658       op1 = TREE_OPERAND (expr, 1);
3659       STRIP_NOPS (op0);
3660       STRIP_NOPS (op1);
3661       break;
3662
3663     CASE_CONVERT:
3664     case NEGATE_EXPR:
3665       op0 = TREE_OPERAND (expr, 0);
3666       STRIP_NOPS (op0);
3667       op1 = NULL_TREE;
3668       break;
3669
3670     default:
3671       /* Just an arbitrary value, FIXME.  */
3672       return new_cost (target_spill_cost[speed], 0);
3673     }
3674
3675   if (op0 == NULL_TREE
3676       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3677     cost0 = no_cost;
3678   else
3679     cost0 = force_expr_to_var_cost (op0, speed);
3680
3681   if (op1 == NULL_TREE
3682       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3683     cost1 = no_cost;
3684   else
3685     cost1 = force_expr_to_var_cost (op1, speed);
3686
3687   mode = TYPE_MODE (TREE_TYPE (expr));
3688   switch (TREE_CODE (expr))
3689     {
3690     case POINTER_PLUS_EXPR:
3691     case PLUS_EXPR:
3692     case MINUS_EXPR:
3693     case NEGATE_EXPR:
3694       cost = new_cost (add_cost (speed, mode), 0);
3695       if (TREE_CODE (expr) != NEGATE_EXPR)
3696         {
3697           tree mult = NULL_TREE;
3698           comp_cost sa_cost;
3699           if (TREE_CODE (op1) == MULT_EXPR)
3700             mult = op1;
3701           else if (TREE_CODE (op0) == MULT_EXPR)
3702             mult = op0;
3703
3704           if (mult != NULL_TREE
3705               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3706               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3707                                     speed, &sa_cost))
3708             return sa_cost;
3709         }
3710       break;
3711
3712     CASE_CONVERT:
3713       {
3714         tree inner_mode, outer_mode;
3715         outer_mode = TREE_TYPE (expr);
3716         inner_mode = TREE_TYPE (op0);
3717         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3718                                        TYPE_MODE (inner_mode), speed), 0);
3719       }
3720       break;
3721
3722     case MULT_EXPR:
3723       if (cst_and_fits_in_hwi (op0))
3724         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3725                                              mode, speed), 0);
3726       else if (cst_and_fits_in_hwi (op1))
3727         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3728                                              mode, speed), 0);
3729       else
3730         return new_cost (target_spill_cost [speed], 0);
3731       break;
3732
3733     default:
3734       gcc_unreachable ();
3735     }
3736
3737   cost = add_costs (cost, cost0);
3738   cost = add_costs (cost, cost1);
3739
3740   /* Bound the cost by target_spill_cost.  The parts of complicated
3741      computations often are either loop invariant or at least can
3742      be shared between several iv uses, so letting this grow without
3743      limits would not give reasonable results.  */
3744   if (cost.cost > (int) target_spill_cost [speed])
3745     cost.cost = target_spill_cost [speed];
3746
3747   return cost;
3748 }
3749
3750 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3751    invariants the computation depends on.  */
3752
3753 static comp_cost
3754 force_var_cost (struct ivopts_data *data,
3755                 tree expr, bitmap *depends_on)
3756 {
3757   if (depends_on)
3758     {
3759       fd_ivopts_data = data;
3760       walk_tree (&expr, find_depends, depends_on, NULL);
3761     }
3762
3763   return force_expr_to_var_cost (expr, data->speed);
3764 }
3765
3766 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3767    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3768    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3769    invariants the computation depends on.  */
3770
3771 static comp_cost
3772 split_address_cost (struct ivopts_data *data,
3773                     tree addr, bool *symbol_present, bool *var_present,
3774                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3775 {
3776   tree core;
3777   HOST_WIDE_INT bitsize;
3778   HOST_WIDE_INT bitpos;
3779   tree toffset;
3780   enum machine_mode mode;
3781   int unsignedp, volatilep;
3782
3783   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3784                               &unsignedp, &volatilep);
3785
3786   if (toffset != 0
3787       || bitpos % BITS_PER_UNIT != 0
3788       || TREE_CODE (core) != VAR_DECL)
3789     {
3790       *symbol_present = false;
3791       *var_present = true;
3792       fd_ivopts_data = data;
3793       walk_tree (&addr, find_depends, depends_on, NULL);
3794       return new_cost (target_spill_cost[data->speed], 0);
3795     }
3796
3797   *offset += bitpos / BITS_PER_UNIT;
3798   if (TREE_STATIC (core)
3799       || DECL_EXTERNAL (core))
3800     {
3801       *symbol_present = true;
3802       *var_present = false;
3803       return no_cost;
3804     }
3805
3806   *symbol_present = false;
3807   *var_present = true;
3808   return no_cost;
3809 }
3810
3811 /* Estimates cost of expressing difference of addresses E1 - E2 as
3812    var + symbol + offset.  The value of offset is added to OFFSET,
3813    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3814    part is missing.  DEPENDS_ON is a set of the invariants the computation
3815    depends on.  */
3816
3817 static comp_cost
3818 ptr_difference_cost (struct ivopts_data *data,
3819                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3820                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3821 {
3822   HOST_WIDE_INT diff = 0;
3823   aff_tree aff_e1, aff_e2;
3824   tree type;
3825
3826   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3827
3828   if (ptr_difference_const (e1, e2, &diff))
3829     {
3830       *offset += diff;
3831       *symbol_present = false;
3832       *var_present = false;
3833       return no_cost;
3834     }
3835
3836   if (integer_zerop (e2))
3837     return split_address_cost (data, TREE_OPERAND (e1, 0),
3838                                symbol_present, var_present, offset, depends_on);
3839
3840   *symbol_present = false;
3841   *var_present = true;
3842
3843   type = signed_type_for (TREE_TYPE (e1));
3844   tree_to_aff_combination (e1, type, &aff_e1);
3845   tree_to_aff_combination (e2, type, &aff_e2);
3846   aff_combination_scale (&aff_e2, double_int_minus_one);
3847   aff_combination_add (&aff_e1, &aff_e2);
3848
3849   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3850 }
3851
3852 /* Estimates cost of expressing difference E1 - E2 as
3853    var + symbol + offset.  The value of offset is added to OFFSET,
3854    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3855    part is missing.  DEPENDS_ON is a set of the invariants the computation
3856    depends on.  */
3857
3858 static comp_cost
3859 difference_cost (struct ivopts_data *data,
3860                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3861                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3862 {
3863   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3864   unsigned HOST_WIDE_INT off1, off2;
3865   aff_tree aff_e1, aff_e2;
3866   tree type;
3867
3868   e1 = strip_offset (e1, &off1);
3869   e2 = strip_offset (e2, &off2);
3870   *offset += off1 - off2;
3871
3872   STRIP_NOPS (e1);
3873   STRIP_NOPS (e2);
3874
3875   if (TREE_CODE (e1) == ADDR_EXPR)
3876     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3877                                 offset, depends_on);
3878   *symbol_present = false;
3879
3880   if (operand_equal_p (e1, e2, 0))
3881     {
3882       *var_present = false;
3883       return no_cost;
3884     }
3885
3886   *var_present = true;
3887
3888   if (integer_zerop (e2))
3889     return force_var_cost (data, e1, depends_on);
3890
3891   if (integer_zerop (e1))
3892     {
3893       comp_cost cost = force_var_cost (data, e2, depends_on);
3894       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3895       return cost;
3896     }
3897
3898   type = signed_type_for (TREE_TYPE (e1));
3899   tree_to_aff_combination (e1, type, &aff_e1);
3900   tree_to_aff_combination (e2, type, &aff_e2);
3901   aff_combination_scale (&aff_e2, double_int_minus_one);
3902   aff_combination_add (&aff_e1, &aff_e2);
3903
3904   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3905 }
3906
3907 /* Returns true if AFF1 and AFF2 are identical.  */
3908
3909 static bool
3910 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3911 {
3912   unsigned i;
3913
3914   if (aff1->n != aff2->n)
3915     return false;
3916
3917   for (i = 0; i < aff1->n; i++)
3918     {
3919       if (aff1->elts[i].coef != aff2->elts[i].coef)
3920         return false;
3921
3922       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3923         return false;
3924     }
3925   return true;
3926 }
3927
3928 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3929
3930 static int
3931 get_expr_id (struct ivopts_data *data, tree expr)
3932 {
3933   struct iv_inv_expr_ent ent;
3934   struct iv_inv_expr_ent **slot;
3935
3936   ent.expr = expr;
3937   ent.hash = iterative_hash_expr (expr, 0);
3938   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3939   if (*slot)
3940     return (*slot)->id;
3941
3942   *slot = XNEW (struct iv_inv_expr_ent);
3943   (*slot)->expr = expr;
3944   (*slot)->hash = ent.hash;
3945   (*slot)->id = data->inv_expr_id++;
3946   return (*slot)->id;
3947 }
3948
3949 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3950    requires a new compiler generated temporary.  Returns -1 otherwise.
3951    ADDRESS_P is a flag indicating if the expression is for address
3952    computation.  */
3953
3954 static int
3955 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3956                             tree cbase, HOST_WIDE_INT ratio,
3957                             bool address_p)
3958 {
3959   aff_tree ubase_aff, cbase_aff;
3960   tree expr, ub, cb;
3961
3962   STRIP_NOPS (ubase);
3963   STRIP_NOPS (cbase);
3964   ub = ubase;
3965   cb = cbase;
3966
3967   if ((TREE_CODE (ubase) == INTEGER_CST)
3968       && (TREE_CODE (cbase) == INTEGER_CST))
3969     return -1;
3970
3971   /* Strips the constant part. */
3972   if (TREE_CODE (ubase) == PLUS_EXPR
3973       || TREE_CODE (ubase) == MINUS_EXPR
3974       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3975     {
3976       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3977         ubase = TREE_OPERAND (ubase, 0);
3978     }
3979
3980   /* Strips the constant part. */
3981   if (TREE_CODE (cbase) == PLUS_EXPR
3982       || TREE_CODE (cbase) == MINUS_EXPR
3983       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3984     {
3985       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3986         cbase = TREE_OPERAND (cbase, 0);
3987     }
3988
3989   if (address_p)
3990     {
3991       if (((TREE_CODE (ubase) == SSA_NAME)
3992            || (TREE_CODE (ubase) == ADDR_EXPR
3993                && is_gimple_min_invariant (ubase)))
3994           && (TREE_CODE (cbase) == INTEGER_CST))
3995         return -1;
3996
3997       if (((TREE_CODE (cbase) == SSA_NAME)
3998            || (TREE_CODE (cbase) == ADDR_EXPR
3999                && is_gimple_min_invariant (cbase)))
4000           && (TREE_CODE (ubase) == INTEGER_CST))
4001         return -1;
4002     }
4003
4004   if (ratio == 1)
4005     {
4006       if (operand_equal_p (ubase, cbase, 0))
4007         return -1;
4008
4009       if (TREE_CODE (ubase) == ADDR_EXPR
4010           && TREE_CODE (cbase) == ADDR_EXPR)
4011         {
4012           tree usym, csym;
4013
4014           usym = TREE_OPERAND (ubase, 0);
4015           csym = TREE_OPERAND (cbase, 0);
4016           if (TREE_CODE (usym) == ARRAY_REF)
4017             {
4018               tree ind = TREE_OPERAND (usym, 1);
4019               if (TREE_CODE (ind) == INTEGER_CST
4020                   && tree_fits_shwi_p (ind)
4021                   && tree_to_shwi (ind) == 0)
4022                 usym = TREE_OPERAND (usym, 0);
4023             }
4024           if (TREE_CODE (csym) == ARRAY_REF)
4025             {
4026               tree ind = TREE_OPERAND (csym, 1);
4027               if (TREE_CODE (ind) == INTEGER_CST
4028                   && tree_fits_shwi_p (ind)
4029                   && tree_to_shwi (ind) == 0)
4030                 csym = TREE_OPERAND (csym, 0);
4031             }
4032           if (operand_equal_p (usym, csym, 0))
4033             return -1;
4034         }
4035       /* Now do more complex comparison  */
4036       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4037       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4038       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4039         return -1;
4040     }
4041
4042   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4043   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4044
4045   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4046   aff_combination_add (&ubase_aff, &cbase_aff);
4047   expr = aff_combination_to_tree (&ubase_aff);
4048   return get_expr_id (data, expr);
4049 }
4050
4051
4052
4053 /* Determines the cost of the computation by that USE is expressed
4054    from induction variable CAND.  If ADDRESS_P is true, we just need
4055    to create an address from it, otherwise we want to get it into
4056    register.  A set of invariants we depend on is stored in
4057    DEPENDS_ON.  AT is the statement at that the value is computed.
4058    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4059    addressing is likely.  */
4060
4061 static comp_cost
4062 get_computation_cost_at (struct ivopts_data *data,
4063                          struct iv_use *use, struct iv_cand *cand,
4064                          bool address_p, bitmap *depends_on, gimple at,
4065                          bool *can_autoinc,
4066                          int *inv_expr_id)
4067 {
4068   tree ubase = use->iv->base, ustep = use->iv->step;
4069   tree cbase, cstep;
4070   tree utype = TREE_TYPE (ubase), ctype;
4071   unsigned HOST_WIDE_INT cstepi, offset = 0;
4072   HOST_WIDE_INT ratio, aratio;
4073   bool var_present, symbol_present, stmt_is_after_inc;
4074   comp_cost cost;
4075   double_int rat;
4076   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4077   enum machine_mode mem_mode = (address_p
4078                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4079                                 : VOIDmode);
4080
4081   *depends_on = NULL;
4082
4083   /* Only consider real candidates.  */
4084   if (!cand->iv)
4085     return infinite_cost;
4086
4087   cbase = cand->iv->base;
4088   cstep = cand->iv->step;
4089   ctype = TREE_TYPE (cbase);
4090
4091   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4092     {
4093       /* We do not have a precision to express the values of use.  */
4094       return infinite_cost;
4095     }
4096
4097   if (address_p
4098       || (use->iv->base_object
4099           && cand->iv->base_object
4100           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4101           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4102     {
4103       /* Do not try to express address of an object with computation based
4104          on address of a different object.  This may cause problems in rtl
4105          level alias analysis (that does not expect this to be happening,
4106          as this is illegal in C), and would be unlikely to be useful
4107          anyway.  */
4108       if (use->iv->base_object
4109           && cand->iv->base_object
4110           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4111         return infinite_cost;
4112     }
4113
4114   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4115     {
4116       /* TODO -- add direct handling of this case.  */
4117       goto fallback;
4118     }
4119
4120   /* CSTEPI is removed from the offset in case statement is after the
4121      increment.  If the step is not constant, we use zero instead.
4122      This is a bit imprecise (there is the extra addition), but
4123      redundancy elimination is likely to transform the code so that
4124      it uses value of the variable before increment anyway,
4125      so it is not that much unrealistic.  */
4126   if (cst_and_fits_in_hwi (cstep))
4127     cstepi = int_cst_value (cstep);
4128   else
4129     cstepi = 0;
4130
4131   if (!constant_multiple_of (ustep, cstep, &rat))
4132     return infinite_cost;
4133
4134   if (rat.fits_shwi ())
4135     ratio = rat.to_shwi ();
4136   else
4137     return infinite_cost;
4138
4139   STRIP_NOPS (cbase);
4140   ctype = TREE_TYPE (cbase);
4141
4142   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4143
4144   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4145      or ratio == 1, it is better to handle this like
4146
4147      ubase - ratio * cbase + ratio * var
4148
4149      (also holds in the case ratio == -1, TODO.  */
4150
4151   if (cst_and_fits_in_hwi (cbase))
4152     {
4153       offset = - ratio * int_cst_value (cbase);
4154       cost = difference_cost (data,
4155                               ubase, build_int_cst (utype, 0),
4156                               &symbol_present, &var_present, &offset,
4157                               depends_on);
4158       cost.cost /= avg_loop_niter (data->current_loop);
4159     }
4160   else if (ratio == 1)
4161     {
4162       tree real_cbase = cbase;
4163
4164       /* Check to see if any adjustment is needed.  */
4165       if (cstepi == 0 && stmt_is_after_inc)
4166         {
4167           aff_tree real_cbase_aff;
4168           aff_tree cstep_aff;
4169
4170           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4171                                    &real_cbase_aff);
4172           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4173
4174           aff_combination_add (&real_cbase_aff, &cstep_aff);
4175           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4176         }
4177
4178       cost = difference_cost (data,
4179                               ubase, real_cbase,
4180                               &symbol_present, &var_present, &offset,
4181                               depends_on);
4182       cost.cost /= avg_loop_niter (data->current_loop);
4183     }
4184   else if (address_p
4185            && !POINTER_TYPE_P (ctype)
4186            && multiplier_allowed_in_address_p
4187                 (ratio, mem_mode,
4188                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4189     {
4190       cbase
4191         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4192       cost = difference_cost (data,
4193                               ubase, cbase,
4194                               &symbol_present, &var_present, &offset,
4195                               depends_on);
4196       cost.cost /= avg_loop_niter (data->current_loop);
4197     }
4198   else
4199     {
4200       cost = force_var_cost (data, cbase, depends_on);
4201       cost = add_costs (cost,
4202                         difference_cost (data,
4203                                          ubase, build_int_cst (utype, 0),
4204                                          &symbol_present, &var_present,
4205                                          &offset, depends_on));
4206       cost.cost /= avg_loop_niter (data->current_loop);
4207       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4208     }
4209
4210   if (inv_expr_id)
4211     {
4212       *inv_expr_id =
4213           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4214       /* Clear depends on.  */
4215       if (*inv_expr_id != -1 && depends_on && *depends_on)
4216         bitmap_clear (*depends_on);
4217     }
4218
4219   /* If we are after the increment, the value of the candidate is higher by
4220      one iteration.  */
4221   if (stmt_is_after_inc)
4222     offset -= ratio * cstepi;
4223
4224   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4225      (symbol/var1/const parts may be omitted).  If we are looking for an
4226      address, find the cost of addressing this.  */
4227   if (address_p)
4228     return add_costs (cost,
4229                       get_address_cost (symbol_present, var_present,
4230                                         offset, ratio, cstepi,
4231                                         mem_mode,
4232                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4233                                         speed, stmt_is_after_inc,
4234                                         can_autoinc));
4235
4236   /* Otherwise estimate the costs for computing the expression.  */
4237   if (!symbol_present && !var_present && !offset)
4238     {
4239       if (ratio != 1)
4240         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4241       return cost;
4242     }
4243
4244   /* Symbol + offset should be compile-time computable so consider that they
4245       are added once to the variable, if present.  */
4246   if (var_present && (symbol_present || offset))
4247     cost.cost += adjust_setup_cost (data,
4248                                     add_cost (speed, TYPE_MODE (ctype)));
4249
4250   /* Having offset does not affect runtime cost in case it is added to
4251      symbol, but it increases complexity.  */
4252   if (offset)
4253     cost.complexity++;
4254
4255   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4256
4257   aratio = ratio > 0 ? ratio : -ratio;
4258   if (aratio != 1)
4259     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4260   return cost;
4261
4262 fallback:
4263   if (can_autoinc)
4264     *can_autoinc = false;
4265
4266   {
4267     /* Just get the expression, expand it and measure the cost.  */
4268     tree comp = get_computation_at (data->current_loop, use, cand, at);
4269
4270     if (!comp)
4271       return infinite_cost;
4272
4273     if (address_p)
4274       comp = build_simple_mem_ref (comp);
4275
4276     return new_cost (computation_cost (comp, speed), 0);
4277   }
4278 }
4279
4280 /* Determines the cost of the computation by that USE is expressed
4281    from induction variable CAND.  If ADDRESS_P is true, we just need
4282    to create an address from it, otherwise we want to get it into
4283    register.  A set of invariants we depend on is stored in
4284    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4285    autoinc addressing is likely.  */
4286
4287 static comp_cost
4288 get_computation_cost (struct ivopts_data *data,
4289                       struct iv_use *use, struct iv_cand *cand,
4290                       bool address_p, bitmap *depends_on,
4291                       bool *can_autoinc, int *inv_expr_id)
4292 {
4293   return get_computation_cost_at (data,
4294                                   use, cand, address_p, depends_on, use->stmt,
4295                                   can_autoinc, inv_expr_id);
4296 }
4297
4298 /* Determines cost of basing replacement of USE on CAND in a generic
4299    expression.  */
4300
4301 static bool
4302 determine_use_iv_cost_generic (struct ivopts_data *data,
4303                                struct iv_use *use, struct iv_cand *cand)
4304 {
4305   bitmap depends_on;
4306   comp_cost cost;
4307   int inv_expr_id = -1;
4308
4309   /* The simple case first -- if we need to express value of the preserved
4310      original biv, the cost is 0.  This also prevents us from counting the
4311      cost of increment twice -- once at this use and once in the cost of
4312      the candidate.  */
4313   if (cand->pos == IP_ORIGINAL
4314       && cand->incremented_at == use->stmt)
4315     {
4316       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4317                        ERROR_MARK, -1);
4318       return true;
4319     }
4320
4321   cost = get_computation_cost (data, use, cand, false, &depends_on,
4322                                NULL, &inv_expr_id);
4323
4324   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4325                    inv_expr_id);
4326
4327   return !infinite_cost_p (cost);
4328 }
4329
4330 /* Determines cost of basing replacement of USE on CAND in an address.  */
4331
4332 static bool
4333 determine_use_iv_cost_address (struct ivopts_data *data,
4334                                struct iv_use *use, struct iv_cand *cand)
4335 {
4336   bitmap depends_on;
4337   bool can_autoinc;
4338   int inv_expr_id = -1;
4339   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4340                                          &can_autoinc, &inv_expr_id);
4341
4342   if (cand->ainc_use == use)
4343     {
4344       if (can_autoinc)
4345         cost.cost -= cand->cost_step;
4346       /* If we generated the candidate solely for exploiting autoincrement
4347          opportunities, and it turns out it can't be used, set the cost to
4348          infinity to make sure we ignore it.  */
4349       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4350         cost = infinite_cost;
4351     }
4352   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4353                    inv_expr_id);
4354
4355   return !infinite_cost_p (cost);
4356 }
4357
4358 /* Computes value of candidate CAND at position AT in iteration NITER, and
4359    stores it to VAL.  */
4360
4361 static void
4362 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4363                aff_tree *val)
4364 {
4365   aff_tree step, delta, nit;
4366   struct iv *iv = cand->iv;
4367   tree type = TREE_TYPE (iv->base);
4368   tree steptype = type;
4369   if (POINTER_TYPE_P (type))
4370     steptype = sizetype;
4371
4372   tree_to_aff_combination (iv->step, steptype, &step);
4373   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4374   aff_combination_convert (&nit, steptype);
4375   aff_combination_mult (&nit, &step, &delta);
4376   if (stmt_after_increment (loop, cand, at))
4377     aff_combination_add (&delta, &step);
4378
4379   tree_to_aff_combination (iv->base, type, val);
4380   aff_combination_add (val, &delta);
4381 }
4382
4383 /* Returns period of induction variable iv.  */
4384
4385 static tree
4386 iv_period (struct iv *iv)
4387 {
4388   tree step = iv->step, period, type;
4389   tree pow2div;
4390
4391   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4392
4393   type = unsigned_type_for (TREE_TYPE (step));
4394   /* Period of the iv is lcm (step, type_range)/step -1,
4395      i.e., N*type_range/step - 1. Since type range is power
4396      of two, N == (step >> num_of_ending_zeros_binary (step),
4397      so the final result is
4398
4399        (type_range >> num_of_ending_zeros_binary (step)) - 1
4400
4401   */
4402   pow2div = num_ending_zeros (step);
4403
4404   period = build_low_bits_mask (type,
4405                                 (TYPE_PRECISION (type)
4406                                  - tree_to_uhwi (pow2div)));
4407
4408   return period;
4409 }
4410
4411 /* Returns the comparison operator used when eliminating the iv USE.  */
4412
4413 static enum tree_code
4414 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4415 {
4416   struct loop *loop = data->current_loop;
4417   basic_block ex_bb;
4418   edge exit;
4419
4420   ex_bb = gimple_bb (use->stmt);
4421   exit = EDGE_SUCC (ex_bb, 0);
4422   if (flow_bb_inside_loop_p (loop, exit->dest))
4423     exit = EDGE_SUCC (ex_bb, 1);
4424
4425   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4426 }
4427
4428 static tree
4429 strip_wrap_conserving_type_conversions (tree exp)
4430 {
4431   while (tree_ssa_useless_type_conversion (exp)
4432          && (nowrap_type_p (TREE_TYPE (exp))
4433              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4434     exp = TREE_OPERAND (exp, 0);
4435   return exp;
4436 }
4437
4438 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4439    check for an exact match.  */
4440
4441 static bool
4442 expr_equal_p (tree e, tree what)
4443 {
4444   gimple stmt;
4445   enum tree_code code;
4446
4447   e = strip_wrap_conserving_type_conversions (e);
4448   what = strip_wrap_conserving_type_conversions (what);
4449
4450   code = TREE_CODE (what);
4451   if (TREE_TYPE (e) != TREE_TYPE (what))
4452     return false;
4453
4454   if (operand_equal_p (e, what, 0))
4455     return true;
4456
4457   if (TREE_CODE (e) != SSA_NAME)
4458     return false;
4459
4460   stmt = SSA_NAME_DEF_STMT (e);
4461   if (gimple_code (stmt) != GIMPLE_ASSIGN
4462       || gimple_assign_rhs_code (stmt) != code)
4463     return false;
4464
4465   switch (get_gimple_rhs_class (code))
4466     {
4467     case GIMPLE_BINARY_RHS:
4468       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4469         return false;
4470       /* Fallthru.  */
4471
4472     case GIMPLE_UNARY_RHS:
4473     case GIMPLE_SINGLE_RHS:
4474       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4475     default:
4476       return false;
4477     }
4478 }
4479
4480 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4481    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4482    calculation is performed in non-wrapping type.
4483
4484    TODO: More generally, we could test for the situation that
4485          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4486          This would require knowing the sign of OFFSET.
4487
4488          Also, we only look for the first addition in the computation of BASE.
4489          More complex analysis would be better, but introducing it just for
4490          this optimization seems like an overkill.  */
4491
4492 static bool
4493 difference_cannot_overflow_p (tree base, tree offset)
4494 {
4495   enum tree_code code;
4496   tree e1, e2;
4497
4498   if (!nowrap_type_p (TREE_TYPE (base)))
4499     return false;
4500
4501   base = expand_simple_operations (base);
4502
4503   if (TREE_CODE (base) == SSA_NAME)
4504     {
4505       gimple stmt = SSA_NAME_DEF_STMT (base);
4506
4507       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4508         return false;
4509
4510       code = gimple_assign_rhs_code (stmt);
4511       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4512         return false;
4513
4514       e1 = gimple_assign_rhs1 (stmt);
4515       e2 = gimple_assign_rhs2 (stmt);
4516     }
4517   else
4518     {
4519       code = TREE_CODE (base);
4520       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4521         return false;
4522       e1 = TREE_OPERAND (base, 0);
4523       e2 = TREE_OPERAND (base, 1);
4524     }
4525
4526   /* TODO: deeper inspection may be necessary to prove the equality.  */
4527   switch (code)
4528     {
4529     case PLUS_EXPR:
4530       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4531     case POINTER_PLUS_EXPR:
4532       return expr_equal_p (e2, offset);
4533
4534     default:
4535       return false;
4536     }
4537 }
4538
4539 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4540    comparison with CAND.  NITER describes the number of iterations of
4541    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4542
4543    We aim to handle the following situation:
4544
4545    sometype *base, *p;
4546    int a, b, i;
4547
4548    i = a;
4549    p = p_0 = base + a;
4550
4551    do
4552      {
4553        bla (*p);
4554        p++;
4555        i++;
4556      }
4557    while (i < b);
4558
4559    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4560    We aim to optimize this to
4561
4562    p = p_0 = base + a;
4563    do
4564      {
4565        bla (*p);
4566        p++;
4567      }
4568    while (p < p_0 - a + b);
4569
4570    This preserves the correctness, since the pointer arithmetics does not
4571    overflow.  More precisely:
4572
4573    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4574       overflow in computing it or the values of p.
4575    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4576       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4577
4578 static bool
4579 iv_elimination_compare_lt (struct ivopts_data *data,
4580                            struct iv_cand *cand, enum tree_code *comp_p,
4581                            struct tree_niter_desc *niter)
4582 {
4583   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4584   struct affine_tree_combination nit, tmpa, tmpb;
4585   enum tree_code comp;
4586   HOST_WIDE_INT step;
4587
4588   /* We need to know that the candidate induction variable does not overflow.
4589      While more complex analysis may be used to prove this, for now just
4590      check that the variable appears in the original program and that it
4591      is computed in a type that guarantees no overflows.  */
4592   cand_type = TREE_TYPE (cand->iv->base);
4593   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4594     return false;
4595
4596   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4597      the calculation of the BOUND could overflow, making the comparison
4598      invalid.  */
4599   if (!data->loop_single_exit_p)
4600     return false;
4601
4602   /* We need to be able to decide whether candidate is increasing or decreasing
4603      in order to choose the right comparison operator.  */
4604   if (!cst_and_fits_in_hwi (cand->iv->step))
4605     return false;
4606   step = int_cst_value (cand->iv->step);
4607
4608   /* Check that the number of iterations matches the expected pattern:
4609      a + 1 > b ? 0 : b - a - 1.  */
4610   mbz = niter->may_be_zero;
4611   if (TREE_CODE (mbz) == GT_EXPR)
4612     {
4613       /* Handle a + 1 > b.  */
4614       tree op0 = TREE_OPERAND (mbz, 0);
4615       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4616         {
4617           a = TREE_OPERAND (op0, 0);
4618           b = TREE_OPERAND (mbz, 1);
4619         }
4620       else
4621         return false;
4622     }
4623   else if (TREE_CODE (mbz) == LT_EXPR)
4624     {
4625       tree op1 = TREE_OPERAND (mbz, 1);
4626
4627       /* Handle b < a + 1.  */
4628       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4629         {
4630           a = TREE_OPERAND (op1, 0);
4631           b = TREE_OPERAND (mbz, 0);
4632         }
4633       else
4634         return false;
4635     }
4636   else
4637     return false;
4638
4639   /* Expected number of iterations is B - A - 1.  Check that it matches
4640      the actual number, i.e., that B - A - NITER = 1.  */
4641   tree_to_aff_combination (niter->niter, nit_type, &nit);
4642   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4643   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4644   aff_combination_scale (&nit, double_int_minus_one);
4645   aff_combination_scale (&tmpa, double_int_minus_one);
4646   aff_combination_add (&tmpb, &tmpa);
4647   aff_combination_add (&tmpb, &nit);
4648   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4649     return false;
4650
4651   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4652      overflow.  */
4653   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4654                         cand->iv->step,
4655                         fold_convert (TREE_TYPE (cand->iv->step), a));
4656   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4657     return false;
4658
4659   /* Determine the new comparison operator.  */
4660   comp = step < 0 ? GT_EXPR : LT_EXPR;
4661   if (*comp_p == NE_EXPR)
4662     *comp_p = comp;
4663   else if (*comp_p == EQ_EXPR)
4664     *comp_p = invert_tree_comparison (comp, false);
4665   else
4666     gcc_unreachable ();
4667
4668   return true;
4669 }
4670
4671 /* Check whether it is possible to express the condition in USE by comparison
4672    of candidate CAND.  If so, store the value compared with to BOUND, and the
4673    comparison operator to COMP.  */
4674
4675 static bool
4676 may_eliminate_iv (struct ivopts_data *data,
4677                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4678                   enum tree_code *comp)
4679 {
4680   basic_block ex_bb;
4681   edge exit;
4682   tree period;
4683   struct loop *loop = data->current_loop;
4684   aff_tree bnd;
4685   struct tree_niter_desc *desc = NULL;
4686
4687   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4688     return false;
4689
4690   /* For now works only for exits that dominate the loop latch.
4691      TODO: extend to other conditions inside loop body.  */
4692   ex_bb = gimple_bb (use->stmt);
4693   if (use->stmt != last_stmt (ex_bb)
4694       || gimple_code (use->stmt) != GIMPLE_COND
4695       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4696     return false;
4697
4698   exit = EDGE_SUCC (ex_bb, 0);
4699   if (flow_bb_inside_loop_p (loop, exit->dest))
4700     exit = EDGE_SUCC (ex_bb, 1);
4701   if (flow_bb_inside_loop_p (loop, exit->dest))
4702     return false;
4703
4704   desc = niter_for_exit (data, exit);
4705   if (!desc)
4706     return false;
4707
4708   /* Determine whether we can use the variable to test the exit condition.
4709      This is the case iff the period of the induction variable is greater
4710      than the number of iterations for which the exit condition is true.  */
4711   period = iv_period (cand->iv);
4712
4713   /* If the number of iterations is constant, compare against it directly.  */
4714   if (TREE_CODE (desc->niter) == INTEGER_CST)
4715     {
4716       /* See cand_value_at.  */
4717       if (stmt_after_increment (loop, cand, use->stmt))
4718         {
4719           if (!tree_int_cst_lt (desc->niter, period))
4720             return false;
4721         }
4722       else
4723         {
4724           if (tree_int_cst_lt (period, desc->niter))
4725             return false;
4726         }
4727     }
4728
4729   /* If not, and if this is the only possible exit of the loop, see whether
4730      we can get a conservative estimate on the number of iterations of the
4731      entire loop and compare against that instead.  */
4732   else
4733     {
4734       double_int period_value, max_niter;
4735
4736       max_niter = desc->max;
4737       if (stmt_after_increment (loop, cand, use->stmt))
4738         max_niter += double_int_one;
4739       period_value = tree_to_double_int (period);
4740       if (max_niter.ugt (period_value))
4741         {
4742           /* See if we can take advantage of inferred loop bound information.  */
4743           if (data->loop_single_exit_p)
4744             {
4745               if (!max_loop_iterations (loop, &max_niter))
4746                 return false;
4747               /* The loop bound is already adjusted by adding 1.  */
4748               if (max_niter.ugt (period_value))
4749                 return false;
4750             }
4751           else
4752             return false;
4753         }
4754     }
4755
4756   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4757
4758   *bound = aff_combination_to_tree (&bnd);
4759   *comp = iv_elimination_compare (data, use);
4760
4761   /* It is unlikely that computing the number of iterations using division
4762      would be more profitable than keeping the original induction variable.  */
4763   if (expression_expensive_p (*bound))
4764     return false;
4765
4766   /* Sometimes, it is possible to handle the situation that the number of
4767      iterations may be zero unless additional assumtions by using <
4768      instead of != in the exit condition.
4769
4770      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4771            base the exit condition on it.  However, that is often too
4772            expensive.  */
4773   if (!integer_zerop (desc->may_be_zero))
4774     return iv_elimination_compare_lt (data, cand, comp, desc);
4775
4776   return true;
4777 }
4778
4779  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4780     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4781
4782 static int
4783 parm_decl_cost (struct ivopts_data *data, tree bound)
4784 {
4785   tree sbound = bound;
4786   STRIP_NOPS (sbound);
4787
4788   if (TREE_CODE (sbound) == SSA_NAME
4789       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4790       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4791       && data->body_includes_call)
4792     return COSTS_N_INSNS (1);
4793
4794   return 0;
4795 }
4796
4797 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4798
4799 static bool
4800 determine_use_iv_cost_condition (struct ivopts_data *data,
4801                                  struct iv_use *use, struct iv_cand *cand)
4802 {
4803   tree bound = NULL_TREE;
4804   struct iv *cmp_iv;
4805   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4806   comp_cost elim_cost, express_cost, cost, bound_cost;
4807   bool ok;
4808   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4809   tree *control_var, *bound_cst;
4810   enum tree_code comp = ERROR_MARK;
4811
4812   /* Only consider real candidates.  */
4813   if (!cand->iv)
4814     {
4815       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4816                        ERROR_MARK, -1);
4817       return false;
4818     }
4819
4820   /* Try iv elimination.  */
4821   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4822     {
4823       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4824       if (elim_cost.cost == 0)
4825         elim_cost.cost = parm_decl_cost (data, bound);
4826       else if (TREE_CODE (bound) == INTEGER_CST)
4827         elim_cost.cost = 0;
4828       /* If we replace a loop condition 'i < n' with 'p < base + n',
4829          depends_on_elim will have 'base' and 'n' set, which implies
4830          that both 'base' and 'n' will be live during the loop.  More likely,
4831          'base + n' will be loop invariant, resulting in only one live value
4832          during the loop.  So in that case we clear depends_on_elim and set
4833         elim_inv_expr_id instead.  */
4834       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4835         {
4836           elim_inv_expr_id = get_expr_id (data, bound);
4837           bitmap_clear (depends_on_elim);
4838         }
4839       /* The bound is a loop invariant, so it will be only computed
4840          once.  */
4841       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4842     }
4843   else
4844     elim_cost = infinite_cost;
4845
4846   /* Try expressing the original giv.  If it is compared with an invariant,
4847      note that we cannot get rid of it.  */
4848   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4849                               NULL, &cmp_iv);
4850   gcc_assert (ok);
4851
4852   /* When the condition is a comparison of the candidate IV against
4853      zero, prefer this IV.
4854
4855      TODO: The constant that we're subtracting from the cost should
4856      be target-dependent.  This information should be added to the
4857      target costs for each backend.  */
4858   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4859       && integer_zerop (*bound_cst)
4860       && (operand_equal_p (*control_var, cand->var_after, 0)
4861           || operand_equal_p (*control_var, cand->var_before, 0)))
4862     elim_cost.cost -= 1;
4863
4864   express_cost = get_computation_cost (data, use, cand, false,
4865                                        &depends_on_express, NULL,
4866                                        &express_inv_expr_id);
4867   fd_ivopts_data = data;
4868   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4869
4870   /* Count the cost of the original bound as well.  */
4871   bound_cost = force_var_cost (data, *bound_cst, NULL);
4872   if (bound_cost.cost == 0)
4873     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4874   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4875     bound_cost.cost = 0;
4876   express_cost.cost += bound_cost.cost;
4877
4878   /* Choose the better approach, preferring the eliminated IV. */
4879   if (compare_costs (elim_cost, express_cost) <= 0)
4880     {
4881       cost = elim_cost;
4882       depends_on = depends_on_elim;
4883       depends_on_elim = NULL;
4884       inv_expr_id = elim_inv_expr_id;
4885     }
4886   else
4887     {
4888       cost = express_cost;
4889       depends_on = depends_on_express;
4890       depends_on_express = NULL;
4891       bound = NULL_TREE;
4892       comp = ERROR_MARK;
4893       inv_expr_id = express_inv_expr_id;
4894     }
4895
4896   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4897
4898   if (depends_on_elim)
4899     BITMAP_FREE (depends_on_elim);
4900   if (depends_on_express)
4901     BITMAP_FREE (depends_on_express);
4902
4903   return !infinite_cost_p (cost);
4904 }
4905
4906 /* Determines cost of basing replacement of USE on CAND.  Returns false
4907    if USE cannot be based on CAND.  */
4908
4909 static bool
4910 determine_use_iv_cost (struct ivopts_data *data,
4911                        struct iv_use *use, struct iv_cand *cand)
4912 {
4913   switch (use->type)
4914     {
4915     case USE_NONLINEAR_EXPR:
4916       return determine_use_iv_cost_generic (data, use, cand);
4917
4918     case USE_ADDRESS:
4919       return determine_use_iv_cost_address (data, use, cand);
4920
4921     case USE_COMPARE:
4922       return determine_use_iv_cost_condition (data, use, cand);
4923
4924     default:
4925       gcc_unreachable ();
4926     }
4927 }
4928
4929 /* Return true if get_computation_cost indicates that autoincrement is
4930    a possibility for the pair of USE and CAND, false otherwise.  */
4931
4932 static bool
4933 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4934                            struct iv_cand *cand)
4935 {
4936   bitmap depends_on;
4937   bool can_autoinc;
4938   comp_cost cost;
4939
4940   if (use->type != USE_ADDRESS)
4941     return false;
4942
4943   cost = get_computation_cost (data, use, cand, true, &depends_on,
4944                                &can_autoinc, NULL);
4945
4946   BITMAP_FREE (depends_on);
4947
4948   return !infinite_cost_p (cost) && can_autoinc;
4949 }
4950
4951 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4952    use that allows autoincrement, and set their AINC_USE if possible.  */
4953
4954 static void
4955 set_autoinc_for_original_candidates (struct ivopts_data *data)
4956 {
4957   unsigned i, j;
4958
4959   for (i = 0; i < n_iv_cands (data); i++)
4960     {
4961       struct iv_cand *cand = iv_cand (data, i);
4962       struct iv_use *closest_before = NULL;
4963       struct iv_use *closest_after = NULL;
4964       if (cand->pos != IP_ORIGINAL)
4965         continue;
4966
4967       for (j = 0; j < n_iv_uses (data); j++)
4968         {
4969           struct iv_use *use = iv_use (data, j);
4970           unsigned uid = gimple_uid (use->stmt);
4971
4972           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4973             continue;
4974
4975           if (uid < gimple_uid (cand->incremented_at)
4976               && (closest_before == NULL
4977                   || uid > gimple_uid (closest_before->stmt)))
4978             closest_before = use;
4979
4980           if (uid > gimple_uid (cand->incremented_at)
4981               && (closest_after == NULL
4982                   || uid < gimple_uid (closest_after->stmt)))
4983             closest_after = use;
4984         }
4985
4986       if (closest_before != NULL
4987           && autoinc_possible_for_pair (data, closest_before, cand))
4988         cand->ainc_use = closest_before;
4989       else if (closest_after != NULL
4990                && autoinc_possible_for_pair (data, closest_after, cand))
4991         cand->ainc_use = closest_after;
4992     }
4993 }
4994
4995 /* Finds the candidates for the induction variables.  */
4996
4997 static void
4998 find_iv_candidates (struct ivopts_data *data)
4999 {
5000   /* Add commonly used ivs.  */
5001   add_standard_iv_candidates (data);
5002
5003   /* Add old induction variables.  */
5004   add_old_ivs_candidates (data);
5005
5006   /* Add induction variables derived from uses.  */
5007   add_derived_ivs_candidates (data);
5008
5009   set_autoinc_for_original_candidates (data);
5010
5011   /* Record the important candidates.  */
5012   record_important_candidates (data);
5013 }
5014
5015 /* Determines costs of basing the use of the iv on an iv candidate.  */
5016
5017 static void
5018 determine_use_iv_costs (struct ivopts_data *data)
5019 {
5020   unsigned i, j;
5021   struct iv_use *use;
5022   struct iv_cand *cand;
5023   bitmap to_clear = BITMAP_ALLOC (NULL);
5024
5025   alloc_use_cost_map (data);
5026
5027   for (i = 0; i < n_iv_uses (data); i++)
5028     {
5029       use = iv_use (data, i);
5030
5031       if (data->consider_all_candidates)
5032         {
5033           for (j = 0; j < n_iv_cands (data); j++)
5034             {
5035               cand = iv_cand (data, j);
5036               determine_use_iv_cost (data, use, cand);
5037             }
5038         }
5039       else
5040         {
5041           bitmap_iterator bi;
5042
5043           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5044             {
5045               cand = iv_cand (data, j);
5046               if (!determine_use_iv_cost (data, use, cand))
5047                 bitmap_set_bit (to_clear, j);
5048             }
5049
5050           /* Remove the candidates for that the cost is infinite from
5051              the list of related candidates.  */
5052           bitmap_and_compl_into (use->related_cands, to_clear);
5053           bitmap_clear (to_clear);
5054         }
5055     }
5056
5057   BITMAP_FREE (to_clear);
5058
5059   if (dump_file && (dump_flags & TDF_DETAILS))
5060     {
5061       fprintf (dump_file, "Use-candidate costs:\n");
5062
5063       for (i = 0; i < n_iv_uses (data); i++)
5064         {
5065           use = iv_use (data, i);
5066
5067           fprintf (dump_file, "Use %d:\n", i);
5068           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5069           for (j = 0; j < use->n_map_members; j++)
5070             {
5071               if (!use->cost_map[j].cand
5072                   || infinite_cost_p (use->cost_map[j].cost))
5073                 continue;
5074
5075               fprintf (dump_file, "  %d\t%d\t%d\t",
5076                        use->cost_map[j].cand->id,
5077                        use->cost_map[j].cost.cost,
5078                        use->cost_map[j].cost.complexity);
5079               if (use->cost_map[j].depends_on)
5080                 bitmap_print (dump_file,
5081                               use->cost_map[j].depends_on, "","");
5082               if (use->cost_map[j].inv_expr_id != -1)
5083                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5084               fprintf (dump_file, "\n");
5085             }
5086
5087           fprintf (dump_file, "\n");
5088         }
5089       fprintf (dump_file, "\n");
5090     }
5091 }
5092
5093 /* Determines cost of the candidate CAND.  */
5094
5095 static void
5096 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5097 {
5098   comp_cost cost_base;
5099   unsigned cost, cost_step;
5100   tree base;
5101
5102   if (!cand->iv)
5103     {
5104       cand->cost = 0;
5105       return;
5106     }
5107
5108   /* There are two costs associated with the candidate -- its increment
5109      and its initialization.  The second is almost negligible for any loop
5110      that rolls enough, so we take it just very little into account.  */
5111
5112   base = cand->iv->base;
5113   cost_base = force_var_cost (data, base, NULL);
5114   /* It will be exceptional that the iv register happens to be initialized with
5115      the proper value at no cost.  In general, there will at least be a regcopy
5116      or a const set.  */
5117   if (cost_base.cost == 0)
5118     cost_base.cost = COSTS_N_INSNS (1);
5119   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5120
5121   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5122
5123   /* Prefer the original ivs unless we may gain something by replacing it.
5124      The reason is to make debugging simpler; so this is not relevant for
5125      artificial ivs created by other optimization passes.  */
5126   if (cand->pos != IP_ORIGINAL
5127       || !SSA_NAME_VAR (cand->var_before)
5128       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5129     cost++;
5130
5131   /* Prefer not to insert statements into latch unless there are some
5132      already (so that we do not create unnecessary jumps).  */
5133   if (cand->pos == IP_END
5134       && empty_block_p (ip_end_pos (data->current_loop)))
5135     cost++;
5136
5137   cand->cost = cost;
5138   cand->cost_step = cost_step;
5139 }
5140
5141 /* Determines costs of computation of the candidates.  */
5142
5143 static void
5144 determine_iv_costs (struct ivopts_data *data)
5145 {
5146   unsigned i;
5147
5148   if (dump_file && (dump_flags & TDF_DETAILS))
5149     {
5150       fprintf (dump_file, "Candidate costs:\n");
5151       fprintf (dump_file, "  cand\tcost\n");
5152     }
5153
5154   for (i = 0; i < n_iv_cands (data); i++)
5155     {
5156       struct iv_cand *cand = iv_cand (data, i);
5157
5158       determine_iv_cost (data, cand);
5159
5160       if (dump_file && (dump_flags & TDF_DETAILS))
5161         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5162     }
5163
5164   if (dump_file && (dump_flags & TDF_DETAILS))
5165     fprintf (dump_file, "\n");
5166 }
5167
5168 /* Calculates cost for having SIZE induction variables.  */
5169
5170 static unsigned
5171 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5172 {
5173   /* We add size to the cost, so that we prefer eliminating ivs
5174      if possible.  */
5175   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5176                                             data->body_includes_call);
5177 }
5178
5179 /* For each size of the induction variable set determine the penalty.  */
5180
5181 static void
5182 determine_set_costs (struct ivopts_data *data)
5183 {
5184   unsigned j, n;
5185   gimple phi;
5186   gimple_stmt_iterator psi;
5187   tree op;
5188   struct loop *loop = data->current_loop;
5189   bitmap_iterator bi;
5190
5191   if (dump_file && (dump_flags & TDF_DETAILS))
5192     {
5193       fprintf (dump_file, "Global costs:\n");
5194       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5195       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5196       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5197       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5198     }
5199
5200   n = 0;
5201   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5202     {
5203       phi = gsi_stmt (psi);
5204       op = PHI_RESULT (phi);
5205
5206       if (virtual_operand_p (op))
5207         continue;
5208
5209       if (get_iv (data, op))
5210         continue;
5211
5212       n++;
5213     }
5214
5215   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5216     {
5217       struct version_info *info = ver_info (data, j);
5218
5219       if (info->inv_id && info->has_nonlin_use)
5220         n++;
5221     }
5222
5223   data->regs_used = n;
5224   if (dump_file && (dump_flags & TDF_DETAILS))
5225     fprintf (dump_file, "  regs_used %d\n", n);
5226
5227   if (dump_file && (dump_flags & TDF_DETAILS))
5228     {
5229       fprintf (dump_file, "  cost for size:\n");
5230       fprintf (dump_file, "  ivs\tcost\n");
5231       for (j = 0; j <= 2 * target_avail_regs; j++)
5232         fprintf (dump_file, "  %d\t%d\n", j,
5233                  ivopts_global_cost_for_size (data, j));
5234       fprintf (dump_file, "\n");
5235     }
5236 }
5237
5238 /* Returns true if A is a cheaper cost pair than B.  */
5239
5240 static bool
5241 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5242 {
5243   int cmp;
5244
5245   if (!a)
5246     return false;
5247
5248   if (!b)
5249     return true;
5250
5251   cmp = compare_costs (a->cost, b->cost);
5252   if (cmp < 0)
5253     return true;
5254
5255   if (cmp > 0)
5256     return false;
5257
5258   /* In case the costs are the same, prefer the cheaper candidate.  */
5259   if (a->cand->cost < b->cand->cost)
5260     return true;
5261
5262   return false;
5263 }
5264
5265
5266 /* Returns candidate by that USE is expressed in IVS.  */
5267
5268 static struct cost_pair *
5269 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5270 {
5271   return ivs->cand_for_use[use->id];
5272 }
5273
5274 /* Computes the cost field of IVS structure.  */
5275
5276 static void
5277 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5278 {
5279   comp_cost cost = ivs->cand_use_cost;
5280
5281   cost.cost += ivs->cand_cost;
5282
5283   cost.cost += ivopts_global_cost_for_size (data,
5284                                             ivs->n_regs + ivs->num_used_inv_expr);
5285
5286   ivs->cost = cost;
5287 }
5288
5289 /* Remove invariants in set INVS to set IVS.  */
5290
5291 static void
5292 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5293 {
5294   bitmap_iterator bi;
5295   unsigned iid;
5296
5297   if (!invs)
5298     return;
5299
5300   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5301     {
5302       ivs->n_invariant_uses[iid]--;
5303       if (ivs->n_invariant_uses[iid] == 0)
5304         ivs->n_regs--;
5305     }
5306 }
5307
5308 /* Set USE not to be expressed by any candidate in IVS.  */
5309
5310 static void
5311 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5312                  struct iv_use *use)
5313 {
5314   unsigned uid = use->id, cid;
5315   struct cost_pair *cp;
5316
5317   cp = ivs->cand_for_use[uid];
5318   if (!cp)
5319     return;
5320   cid = cp->cand->id;
5321
5322   ivs->bad_uses++;
5323   ivs->cand_for_use[uid] = NULL;
5324   ivs->n_cand_uses[cid]--;
5325
5326   if (ivs->n_cand_uses[cid] == 0)
5327     {
5328       bitmap_clear_bit (ivs->cands, cid);
5329       /* Do not count the pseudocandidates.  */
5330       if (cp->cand->iv)
5331         ivs->n_regs--;
5332       ivs->n_cands--;
5333       ivs->cand_cost -= cp->cand->cost;
5334
5335       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5336     }
5337
5338   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5339
5340   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5341
5342   if (cp->inv_expr_id != -1)
5343     {
5344       ivs->used_inv_expr[cp->inv_expr_id]--;
5345       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5346         ivs->num_used_inv_expr--;
5347     }
5348   iv_ca_recount_cost (data, ivs);
5349 }
5350
5351 /* Add invariants in set INVS to set IVS.  */
5352
5353 static void
5354 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5355 {
5356   bitmap_iterator bi;
5357   unsigned iid;
5358
5359   if (!invs)
5360     return;
5361
5362   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5363     {
5364       ivs->n_invariant_uses[iid]++;
5365       if (ivs->n_invariant_uses[iid] == 1)
5366         ivs->n_regs++;
5367     }
5368 }
5369
5370 /* Set cost pair for USE in set IVS to CP.  */
5371
5372 static void
5373 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5374               struct iv_use *use, struct cost_pair *cp)
5375 {
5376   unsigned uid = use->id, cid;
5377
5378   if (ivs->cand_for_use[uid] == cp)
5379     return;
5380
5381   if (ivs->cand_for_use[uid])
5382     iv_ca_set_no_cp (data, ivs, use);
5383
5384   if (cp)
5385     {
5386       cid = cp->cand->id;
5387
5388       ivs->bad_uses--;
5389       ivs->cand_for_use[uid] = cp;
5390       ivs->n_cand_uses[cid]++;
5391       if (ivs->n_cand_uses[cid] == 1)
5392         {
5393           bitmap_set_bit (ivs->cands, cid);
5394           /* Do not count the pseudocandidates.  */
5395           if (cp->cand->iv)
5396             ivs->n_regs++;
5397           ivs->n_cands++;
5398           ivs->cand_cost += cp->cand->cost;
5399
5400           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5401         }
5402
5403       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5404       iv_ca_set_add_invariants (ivs, cp->depends_on);
5405
5406       if (cp->inv_expr_id != -1)
5407         {
5408           ivs->used_inv_expr[cp->inv_expr_id]++;
5409           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5410             ivs->num_used_inv_expr++;
5411         }
5412       iv_ca_recount_cost (data, ivs);
5413     }
5414 }
5415
5416 /* Extend set IVS by expressing USE by some of the candidates in it
5417    if possible. All important candidates will be considered
5418    if IMPORTANT_CANDIDATES is true.  */
5419
5420 static void
5421 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5422                struct iv_use *use, bool important_candidates)
5423 {
5424   struct cost_pair *best_cp = NULL, *cp;
5425   bitmap_iterator bi;
5426   bitmap cands;
5427   unsigned i;
5428
5429   gcc_assert (ivs->upto >= use->id);
5430
5431   if (ivs->upto == use->id)
5432     {
5433       ivs->upto++;
5434       ivs->bad_uses++;
5435     }
5436
5437   cands = (important_candidates ? data->important_candidates : ivs->cands);
5438   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5439     {
5440       struct iv_cand *cand = iv_cand (data, i);
5441
5442       cp = get_use_iv_cost (data, use, cand);
5443
5444       if (cheaper_cost_pair (cp, best_cp))
5445         best_cp = cp;
5446     }
5447
5448   iv_ca_set_cp (data, ivs, use, best_cp);
5449 }
5450
5451 /* Get cost for assignment IVS.  */
5452
5453 static comp_cost
5454 iv_ca_cost (struct iv_ca *ivs)
5455 {
5456   /* This was a conditional expression but it triggered a bug in
5457      Sun C 5.5.  */
5458   if (ivs->bad_uses)
5459     return infinite_cost;
5460   else
5461     return ivs->cost;
5462 }
5463
5464 /* Returns true if all dependences of CP are among invariants in IVS.  */
5465
5466 static bool
5467 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5468 {
5469   unsigned i;
5470   bitmap_iterator bi;
5471
5472   if (!cp->depends_on)
5473     return true;
5474
5475   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5476     {
5477       if (ivs->n_invariant_uses[i] == 0)
5478         return false;
5479     }
5480
5481   return true;
5482 }
5483
5484 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5485    it before NEXT_CHANGE.  */
5486
5487 static struct iv_ca_delta *
5488 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5489                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5490 {
5491   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5492
5493   change->use = use;
5494   change->old_cp = old_cp;
5495   change->new_cp = new_cp;
5496   change->next_change = next_change;
5497
5498   return change;
5499 }
5500
5501 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5502    are rewritten.  */
5503
5504 static struct iv_ca_delta *
5505 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5506 {
5507   struct iv_ca_delta *last;
5508
5509   if (!l2)
5510     return l1;
5511
5512   if (!l1)
5513     return l2;
5514
5515   for (last = l1; last->next_change; last = last->next_change)
5516     continue;
5517   last->next_change = l2;
5518
5519   return l1;
5520 }
5521
5522 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5523
5524 static struct iv_ca_delta *
5525 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5526 {
5527   struct iv_ca_delta *act, *next, *prev = NULL;
5528   struct cost_pair *tmp;
5529
5530   for (act = delta; act; act = next)
5531     {
5532       next = act->next_change;
5533       act->next_change = prev;
5534       prev = act;
5535
5536       tmp = act->old_cp;
5537       act->old_cp = act->new_cp;
5538       act->new_cp = tmp;
5539     }
5540
5541   return prev;
5542 }
5543
5544 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5545    reverted instead.  */
5546
5547 static void
5548 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5549                     struct iv_ca_delta *delta, bool forward)
5550 {
5551   struct cost_pair *from, *to;
5552   struct iv_ca_delta *act;
5553
5554   if (!forward)
5555     delta = iv_ca_delta_reverse (delta);
5556
5557   for (act = delta; act; act = act->next_change)
5558     {
5559       from = act->old_cp;
5560       to = act->new_cp;
5561       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5562       iv_ca_set_cp (data, ivs, act->use, to);
5563     }
5564
5565   if (!forward)
5566     iv_ca_delta_reverse (delta);
5567 }
5568
5569 /* Returns true if CAND is used in IVS.  */
5570
5571 static bool
5572 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5573 {
5574   return ivs->n_cand_uses[cand->id] > 0;
5575 }
5576
5577 /* Returns number of induction variable candidates in the set IVS.  */
5578
5579 static unsigned
5580 iv_ca_n_cands (struct iv_ca *ivs)
5581 {
5582   return ivs->n_cands;
5583 }
5584
5585 /* Free the list of changes DELTA.  */
5586
5587 static void
5588 iv_ca_delta_free (struct iv_ca_delta **delta)
5589 {
5590   struct iv_ca_delta *act, *next;
5591
5592   for (act = *delta; act; act = next)
5593     {
5594       next = act->next_change;
5595       free (act);
5596     }
5597
5598   *delta = NULL;
5599 }
5600
5601 /* Allocates new iv candidates assignment.  */
5602
5603 static struct iv_ca *
5604 iv_ca_new (struct ivopts_data *data)
5605 {
5606   struct iv_ca *nw = XNEW (struct iv_ca);
5607
5608   nw->upto = 0;
5609   nw->bad_uses = 0;
5610   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5611   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5612   nw->cands = BITMAP_ALLOC (NULL);
5613   nw->n_cands = 0;
5614   nw->n_regs = 0;
5615   nw->cand_use_cost = no_cost;
5616   nw->cand_cost = 0;
5617   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5618   nw->cost = no_cost;
5619   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5620   nw->num_used_inv_expr = 0;
5621
5622   return nw;
5623 }
5624
5625 /* Free memory occupied by the set IVS.  */
5626
5627 static void
5628 iv_ca_free (struct iv_ca **ivs)
5629 {
5630   free ((*ivs)->cand_for_use);
5631   free ((*ivs)->n_cand_uses);
5632   BITMAP_FREE ((*ivs)->cands);
5633   free ((*ivs)->n_invariant_uses);
5634   free ((*ivs)->used_inv_expr);
5635   free (*ivs);
5636   *ivs = NULL;
5637 }
5638
5639 /* Dumps IVS to FILE.  */
5640
5641 static void
5642 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5643 {
5644   const char *pref = "  invariants ";
5645   unsigned i;
5646   comp_cost cost = iv_ca_cost (ivs);
5647
5648   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5649   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5650            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5651   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5652
5653    for (i = 0; i < ivs->upto; i++)
5654     {
5655       struct iv_use *use = iv_use (data, i);
5656       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5657       if (cp)
5658         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5659                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5660       else
5661         fprintf (file, "   use:%d --> ??\n", use->id);
5662     }
5663
5664   for (i = 1; i <= data->max_inv_id; i++)
5665     if (ivs->n_invariant_uses[i])
5666       {
5667         fprintf (file, "%s%d", pref, i);
5668         pref = ", ";
5669       }
5670   fprintf (file, "\n\n");
5671 }
5672
5673 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5674    new set, and store differences in DELTA.  Number of induction variables
5675    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5676    the function will try to find a solution with mimimal iv candidates.  */
5677
5678 static comp_cost
5679 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5680               struct iv_cand *cand, struct iv_ca_delta **delta,
5681               unsigned *n_ivs, bool min_ncand)
5682 {
5683   unsigned i;
5684   comp_cost cost;
5685   struct iv_use *use;
5686   struct cost_pair *old_cp, *new_cp;
5687
5688   *delta = NULL;
5689   for (i = 0; i < ivs->upto; i++)
5690     {
5691       use = iv_use (data, i);
5692       old_cp = iv_ca_cand_for_use (ivs, use);
5693
5694       if (old_cp
5695           && old_cp->cand == cand)
5696         continue;
5697
5698       new_cp = get_use_iv_cost (data, use, cand);
5699       if (!new_cp)
5700         continue;
5701
5702       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5703         continue;
5704
5705       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5706         continue;
5707
5708       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5709     }
5710
5711   iv_ca_delta_commit (data, ivs, *delta, true);
5712   cost = iv_ca_cost (ivs);
5713   if (n_ivs)
5714     *n_ivs = iv_ca_n_cands (ivs);
5715   iv_ca_delta_commit (data, ivs, *delta, false);
5716
5717   return cost;
5718 }
5719
5720 /* Try narrowing set IVS by removing CAND.  Return the cost of
5721    the new set and store the differences in DELTA.  */
5722
5723 static comp_cost
5724 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5725               struct iv_cand *cand, struct iv_ca_delta **delta)
5726 {
5727   unsigned i, ci;
5728   struct iv_use *use;
5729   struct cost_pair *old_cp, *new_cp, *cp;
5730   bitmap_iterator bi;
5731   struct iv_cand *cnd;
5732   comp_cost cost;
5733
5734   *delta = NULL;
5735   for (i = 0; i < n_iv_uses (data); i++)
5736     {
5737       use = iv_use (data, i);
5738
5739       old_cp = iv_ca_cand_for_use (ivs, use);
5740       if (old_cp->cand != cand)
5741         continue;
5742
5743       new_cp = NULL;
5744
5745       if (data->consider_all_candidates)
5746         {
5747           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5748             {
5749               if (ci == cand->id)
5750                 continue;
5751
5752               cnd = iv_cand (data, ci);
5753
5754               cp = get_use_iv_cost (data, use, cnd);
5755               if (!cp)
5756                 continue;
5757
5758               if (!iv_ca_has_deps (ivs, cp))
5759                 continue;
5760
5761               if (!cheaper_cost_pair (cp, new_cp))
5762                 continue;
5763
5764               new_cp = cp;
5765             }
5766         }
5767       else
5768         {
5769           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5770             {
5771               if (ci == cand->id)
5772                 continue;
5773
5774               cnd = iv_cand (data, ci);
5775
5776               cp = get_use_iv_cost (data, use, cnd);
5777               if (!cp)
5778                 continue;
5779               if (!iv_ca_has_deps (ivs, cp))
5780                 continue;
5781
5782               if (!cheaper_cost_pair (cp, new_cp))
5783                 continue;
5784
5785               new_cp = cp;
5786             }
5787         }
5788
5789       if (!new_cp)
5790         {
5791           iv_ca_delta_free (delta);
5792           return infinite_cost;
5793         }
5794
5795       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5796     }
5797
5798   iv_ca_delta_commit (data, ivs, *delta, true);
5799   cost = iv_ca_cost (ivs);
5800   iv_ca_delta_commit (data, ivs, *delta, false);
5801
5802   return cost;
5803 }
5804
5805 /* Try optimizing the set of candidates IVS by removing candidates different
5806    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5807    differences in DELTA.  */
5808
5809 static comp_cost
5810 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5811              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5812 {
5813   bitmap_iterator bi;
5814   struct iv_ca_delta *act_delta, *best_delta;
5815   unsigned i;
5816   comp_cost best_cost, acost;
5817   struct iv_cand *cand;
5818
5819   best_delta = NULL;
5820   best_cost = iv_ca_cost (ivs);
5821
5822   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5823     {
5824       cand = iv_cand (data, i);
5825
5826       if (cand == except_cand)
5827         continue;
5828
5829       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5830
5831       if (compare_costs (acost, best_cost) < 0)
5832         {
5833           best_cost = acost;
5834           iv_ca_delta_free (&best_delta);
5835           best_delta = act_delta;
5836         }
5837       else
5838         iv_ca_delta_free (&act_delta);
5839     }
5840
5841   if (!best_delta)
5842     {
5843       *delta = NULL;
5844       return best_cost;
5845     }
5846
5847   /* Recurse to possibly remove other unnecessary ivs.  */
5848   iv_ca_delta_commit (data, ivs, best_delta, true);
5849   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5850   iv_ca_delta_commit (data, ivs, best_delta, false);
5851   *delta = iv_ca_delta_join (best_delta, *delta);
5852   return best_cost;
5853 }
5854
5855 /* Tries to extend the sets IVS in the best possible way in order
5856    to express the USE.  If ORIGINALP is true, prefer candidates from
5857    the original set of IVs, otherwise favor important candidates not
5858    based on any memory object.  */
5859
5860 static bool
5861 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5862                   struct iv_use *use, bool originalp)
5863 {
5864   comp_cost best_cost, act_cost;
5865   unsigned i;
5866   bitmap_iterator bi;
5867   struct iv_cand *cand;
5868   struct iv_ca_delta *best_delta = NULL, *act_delta;
5869   struct cost_pair *cp;
5870
5871   iv_ca_add_use (data, ivs, use, false);
5872   best_cost = iv_ca_cost (ivs);
5873
5874   cp = iv_ca_cand_for_use (ivs, use);
5875   if (!cp)
5876     {
5877       ivs->upto--;
5878       ivs->bad_uses--;
5879       iv_ca_add_use (data, ivs, use, true);
5880       best_cost = iv_ca_cost (ivs);
5881       cp = iv_ca_cand_for_use (ivs, use);
5882     }
5883   if (cp)
5884     {
5885       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5886       iv_ca_set_no_cp (data, ivs, use);
5887     }
5888
5889   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5890      first try important candidates not based on any memory object.  Only if
5891      this fails, try the specific ones.  Rationale -- in loops with many
5892      variables the best choice often is to use just one generic biv.  If we
5893      added here many ivs specific to the uses, the optimization algorithm later
5894      would be likely to get stuck in a local minimum, thus causing us to create
5895      too many ivs.  The approach from few ivs to more seems more likely to be
5896      successful -- starting from few ivs, replacing an expensive use by a
5897      specific iv should always be a win.  */
5898   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5899     {
5900       cand = iv_cand (data, i);
5901
5902       if (originalp && cand->pos !=IP_ORIGINAL)
5903         continue;
5904
5905       if (!originalp && cand->iv->base_object != NULL_TREE)
5906         continue;
5907
5908       if (iv_ca_cand_used_p (ivs, cand))
5909         continue;
5910
5911       cp = get_use_iv_cost (data, use, cand);
5912       if (!cp)
5913         continue;
5914
5915       iv_ca_set_cp (data, ivs, use, cp);
5916       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5917                                true);
5918       iv_ca_set_no_cp (data, ivs, use);
5919       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5920
5921       if (compare_costs (act_cost, best_cost) < 0)
5922         {
5923           best_cost = act_cost;
5924
5925           iv_ca_delta_free (&best_delta);
5926           best_delta = act_delta;
5927         }
5928       else
5929         iv_ca_delta_free (&act_delta);
5930     }
5931
5932   if (infinite_cost_p (best_cost))
5933     {
5934       for (i = 0; i < use->n_map_members; i++)
5935         {
5936           cp = use->cost_map + i;
5937           cand = cp->cand;
5938           if (!cand)
5939             continue;
5940
5941           /* Already tried this.  */
5942           if (cand->important)
5943             {
5944               if (originalp && cand->pos == IP_ORIGINAL)
5945                 continue;
5946               if (!originalp && cand->iv->base_object == NULL_TREE)
5947                 continue;
5948             }
5949
5950           if (iv_ca_cand_used_p (ivs, cand))
5951             continue;
5952
5953           act_delta = NULL;
5954           iv_ca_set_cp (data, ivs, use, cp);
5955           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5956           iv_ca_set_no_cp (data, ivs, use);
5957           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5958                                        cp, act_delta);
5959
5960           if (compare_costs (act_cost, best_cost) < 0)
5961             {
5962               best_cost = act_cost;
5963
5964               if (best_delta)
5965                 iv_ca_delta_free (&best_delta);
5966               best_delta = act_delta;
5967             }
5968           else
5969             iv_ca_delta_free (&act_delta);
5970         }
5971     }
5972
5973   iv_ca_delta_commit (data, ivs, best_delta, true);
5974   iv_ca_delta_free (&best_delta);
5975
5976   return !infinite_cost_p (best_cost);
5977 }
5978
5979 /* Finds an initial assignment of candidates to uses.  */
5980
5981 static struct iv_ca *
5982 get_initial_solution (struct ivopts_data *data, bool originalp)
5983 {
5984   struct iv_ca *ivs = iv_ca_new (data);
5985   unsigned i;
5986
5987   for (i = 0; i < n_iv_uses (data); i++)
5988     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5989       {
5990         iv_ca_free (&ivs);
5991         return NULL;
5992       }
5993
5994   return ivs;
5995 }
5996
5997 /* Tries to improve set of induction variables IVS.  */
5998
5999 static bool
6000 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6001 {
6002   unsigned i, n_ivs;
6003   comp_cost acost, best_cost = iv_ca_cost (ivs);
6004   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6005   struct iv_cand *cand;
6006
6007   /* Try extending the set of induction variables by one.  */
6008   for (i = 0; i < n_iv_cands (data); i++)
6009     {
6010       cand = iv_cand (data, i);
6011
6012       if (iv_ca_cand_used_p (ivs, cand))
6013         continue;
6014
6015       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6016       if (!act_delta)
6017         continue;
6018
6019       /* If we successfully added the candidate and the set is small enough,
6020          try optimizing it by removing other candidates.  */
6021       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6022         {
6023           iv_ca_delta_commit (data, ivs, act_delta, true);
6024           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6025           iv_ca_delta_commit (data, ivs, act_delta, false);
6026           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6027         }
6028
6029       if (compare_costs (acost, best_cost) < 0)
6030         {
6031           best_cost = acost;
6032           iv_ca_delta_free (&best_delta);
6033           best_delta = act_delta;
6034         }
6035       else
6036         iv_ca_delta_free (&act_delta);
6037     }
6038
6039   if (!best_delta)
6040     {
6041       /* Try removing the candidates from the set instead.  */
6042       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6043
6044       /* Nothing more we can do.  */
6045       if (!best_delta)
6046         return false;
6047     }
6048
6049   iv_ca_delta_commit (data, ivs, best_delta, true);
6050   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6051   iv_ca_delta_free (&best_delta);
6052   return true;
6053 }
6054
6055 /* Attempts to find the optimal set of induction variables.  We do simple
6056    greedy heuristic -- we try to replace at most one candidate in the selected
6057    solution and remove the unused ivs while this improves the cost.  */
6058
6059 static struct iv_ca *
6060 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6061 {
6062   struct iv_ca *set;
6063
6064   /* Get the initial solution.  */
6065   set = get_initial_solution (data, originalp);
6066   if (!set)
6067     {
6068       if (dump_file && (dump_flags & TDF_DETAILS))
6069         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6070       return NULL;
6071     }
6072
6073   if (dump_file && (dump_flags & TDF_DETAILS))
6074     {
6075       fprintf (dump_file, "Initial set of candidates:\n");
6076       iv_ca_dump (data, dump_file, set);
6077     }
6078
6079   while (try_improve_iv_set (data, set))
6080     {
6081       if (dump_file && (dump_flags & TDF_DETAILS))
6082         {
6083           fprintf (dump_file, "Improved to:\n");
6084           iv_ca_dump (data, dump_file, set);
6085         }
6086     }
6087
6088   return set;
6089 }
6090
6091 static struct iv_ca *
6092 find_optimal_iv_set (struct ivopts_data *data)
6093 {
6094   unsigned i;
6095   struct iv_ca *set, *origset;
6096   struct iv_use *use;
6097   comp_cost cost, origcost;
6098
6099   /* Determine the cost based on a strategy that starts with original IVs,
6100      and try again using a strategy that prefers candidates not based
6101      on any IVs.  */
6102   origset = find_optimal_iv_set_1 (data, true);
6103   set = find_optimal_iv_set_1 (data, false);
6104
6105   if (!origset && !set)
6106     return NULL;
6107
6108   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6109   cost = set ? iv_ca_cost (set) : infinite_cost;
6110
6111   if (dump_file && (dump_flags & TDF_DETAILS))
6112     {
6113       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6114                origcost.cost, origcost.complexity);
6115       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6116                cost.cost, cost.complexity);
6117     }
6118
6119   /* Choose the one with the best cost.  */
6120   if (compare_costs (origcost, cost) <= 0)
6121     {
6122       if (set)
6123         iv_ca_free (&set);
6124       set = origset;
6125     }
6126   else if (origset)
6127     iv_ca_free (&origset);
6128
6129   for (i = 0; i < n_iv_uses (data); i++)
6130     {
6131       use = iv_use (data, i);
6132       use->selected = iv_ca_cand_for_use (set, use)->cand;
6133     }
6134
6135   return set;
6136 }
6137
6138 /* Creates a new induction variable corresponding to CAND.  */
6139
6140 static void
6141 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6142 {
6143   gimple_stmt_iterator incr_pos;
6144   tree base;
6145   bool after = false;
6146
6147   if (!cand->iv)
6148     return;
6149
6150   switch (cand->pos)
6151     {
6152     case IP_NORMAL:
6153       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6154       break;
6155
6156     case IP_END:
6157       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6158       after = true;
6159       break;
6160
6161     case IP_AFTER_USE:
6162       after = true;
6163       /* fall through */
6164     case IP_BEFORE_USE:
6165       incr_pos = gsi_for_stmt (cand->incremented_at);
6166       break;
6167
6168     case IP_ORIGINAL:
6169       /* Mark that the iv is preserved.  */
6170       name_info (data, cand->var_before)->preserve_biv = true;
6171       name_info (data, cand->var_after)->preserve_biv = true;
6172
6173       /* Rewrite the increment so that it uses var_before directly.  */
6174       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6175       return;
6176     }
6177
6178   gimple_add_tmp_var (cand->var_before);
6179
6180   base = unshare_expr (cand->iv->base);
6181
6182   create_iv (base, unshare_expr (cand->iv->step),
6183              cand->var_before, data->current_loop,
6184              &incr_pos, after, &cand->var_before, &cand->var_after);
6185 }
6186
6187 /* Creates new induction variables described in SET.  */
6188
6189 static void
6190 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6191 {
6192   unsigned i;
6193   struct iv_cand *cand;
6194   bitmap_iterator bi;
6195
6196   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6197     {
6198       cand = iv_cand (data, i);
6199       create_new_iv (data, cand);
6200     }
6201
6202   if (dump_file && (dump_flags & TDF_DETAILS))
6203     {
6204       fprintf (dump_file, "\nSelected IV set: \n");
6205       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6206         {
6207           cand = iv_cand (data, i);
6208           dump_cand (dump_file, cand);
6209         }
6210       fprintf (dump_file, "\n");
6211     }
6212 }
6213
6214 /* Rewrites USE (definition of iv used in a nonlinear expression)
6215    using candidate CAND.  */
6216
6217 static void
6218 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6219                             struct iv_use *use, struct iv_cand *cand)
6220 {
6221   tree comp;
6222   tree op, tgt;
6223   gimple ass;
6224   gimple_stmt_iterator bsi;
6225
6226   /* An important special case -- if we are asked to express value of
6227      the original iv by itself, just exit; there is no need to
6228      introduce a new computation (that might also need casting the
6229      variable to unsigned and back).  */
6230   if (cand->pos == IP_ORIGINAL
6231       && cand->incremented_at == use->stmt)
6232     {
6233       enum tree_code stmt_code;
6234
6235       gcc_assert (is_gimple_assign (use->stmt));
6236       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6237
6238       /* Check whether we may leave the computation unchanged.
6239          This is the case only if it does not rely on other
6240          computations in the loop -- otherwise, the computation
6241          we rely upon may be removed in remove_unused_ivs,
6242          thus leading to ICE.  */
6243       stmt_code = gimple_assign_rhs_code (use->stmt);
6244       if (stmt_code == PLUS_EXPR
6245           || stmt_code == MINUS_EXPR
6246           || stmt_code == POINTER_PLUS_EXPR)
6247         {
6248           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6249             op = gimple_assign_rhs2 (use->stmt);
6250           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6251             op = gimple_assign_rhs1 (use->stmt);
6252           else
6253             op = NULL_TREE;
6254         }
6255       else
6256         op = NULL_TREE;
6257
6258       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6259         return;
6260     }
6261
6262   comp = get_computation (data->current_loop, use, cand);
6263   gcc_assert (comp != NULL_TREE);
6264
6265   switch (gimple_code (use->stmt))
6266     {
6267     case GIMPLE_PHI:
6268       tgt = PHI_RESULT (use->stmt);
6269
6270       /* If we should keep the biv, do not replace it.  */
6271       if (name_info (data, tgt)->preserve_biv)
6272         return;
6273
6274       bsi = gsi_after_labels (gimple_bb (use->stmt));
6275       break;
6276
6277     case GIMPLE_ASSIGN:
6278       tgt = gimple_assign_lhs (use->stmt);
6279       bsi = gsi_for_stmt (use->stmt);
6280       break;
6281
6282     default:
6283       gcc_unreachable ();
6284     }
6285
6286   if (!valid_gimple_rhs_p (comp)
6287       || (gimple_code (use->stmt) != GIMPLE_PHI
6288           /* We can't allow re-allocating the stmt as it might be pointed
6289              to still.  */
6290           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6291               >= gimple_num_ops (gsi_stmt (bsi)))))
6292     {
6293       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6294                                        true, GSI_SAME_STMT);
6295       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6296         {
6297           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6298           /* As this isn't a plain copy we have to reset alignment
6299              information.  */
6300           if (SSA_NAME_PTR_INFO (comp))
6301             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6302         }
6303     }
6304
6305   if (gimple_code (use->stmt) == GIMPLE_PHI)
6306     {
6307       ass = gimple_build_assign (tgt, comp);
6308       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6309
6310       bsi = gsi_for_stmt (use->stmt);
6311       remove_phi_node (&bsi, false);
6312     }
6313   else
6314     {
6315       gimple_assign_set_rhs_from_tree (&bsi, comp);
6316       use->stmt = gsi_stmt (bsi);
6317     }
6318 }
6319
6320 /* Performs a peephole optimization to reorder the iv update statement with
6321    a mem ref to enable instruction combining in later phases. The mem ref uses
6322    the iv value before the update, so the reordering transformation requires
6323    adjustment of the offset. CAND is the selected IV_CAND.
6324
6325    Example:
6326
6327    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6328    iv2 = iv1 + 1;
6329
6330    if (t < val)      (1)
6331      goto L;
6332    goto Head;
6333
6334
6335    directly propagating t over to (1) will introduce overlapping live range
6336    thus increase register pressure. This peephole transform it into:
6337
6338
6339    iv2 = iv1 + 1;
6340    t = MEM_REF (base, iv2, 8, 8);
6341    if (t < val)
6342      goto L;
6343    goto Head;
6344 */
6345
6346 static void
6347 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6348 {
6349   tree var_after;
6350   gimple iv_update, stmt;
6351   basic_block bb;
6352   gimple_stmt_iterator gsi, gsi_iv;
6353
6354   if (cand->pos != IP_NORMAL)
6355     return;
6356
6357   var_after = cand->var_after;
6358   iv_update = SSA_NAME_DEF_STMT (var_after);
6359
6360   bb = gimple_bb (iv_update);
6361   gsi = gsi_last_nondebug_bb (bb);
6362   stmt = gsi_stmt (gsi);
6363
6364   /* Only handle conditional statement for now.  */
6365   if (gimple_code (stmt) != GIMPLE_COND)
6366     return;
6367
6368   gsi_prev_nondebug (&gsi);
6369   stmt = gsi_stmt (gsi);
6370   if (stmt != iv_update)
6371     return;
6372
6373   gsi_prev_nondebug (&gsi);
6374   if (gsi_end_p (gsi))
6375     return;
6376
6377   stmt = gsi_stmt (gsi);
6378   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6379     return;
6380
6381   if (stmt != use->stmt)
6382     return;
6383
6384   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6385     return;
6386
6387   if (dump_file && (dump_flags & TDF_DETAILS))
6388     {
6389       fprintf (dump_file, "Reordering \n");
6390       print_gimple_stmt (dump_file, iv_update, 0, 0);
6391       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6392       fprintf (dump_file, "\n");
6393     }
6394
6395   gsi = gsi_for_stmt (use->stmt);
6396   gsi_iv = gsi_for_stmt (iv_update);
6397   gsi_move_before (&gsi_iv, &gsi);
6398
6399   cand->pos = IP_BEFORE_USE;
6400   cand->incremented_at = use->stmt;
6401 }
6402
6403 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6404
6405 static void
6406 rewrite_use_address (struct ivopts_data *data,
6407                      struct iv_use *use, struct iv_cand *cand)
6408 {
6409   aff_tree aff;
6410   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6411   tree base_hint = NULL_TREE;
6412   tree ref, iv;
6413   bool ok;
6414
6415   adjust_iv_update_pos (cand, use);
6416   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6417   gcc_assert (ok);
6418   unshare_aff_combination (&aff);
6419
6420   /* To avoid undefined overflow problems, all IV candidates use unsigned
6421      integer types.  The drawback is that this makes it impossible for
6422      create_mem_ref to distinguish an IV that is based on a memory object
6423      from one that represents simply an offset.
6424
6425      To work around this problem, we pass a hint to create_mem_ref that
6426      indicates which variable (if any) in aff is an IV based on a memory
6427      object.  Note that we only consider the candidate.  If this is not
6428      based on an object, the base of the reference is in some subexpression
6429      of the use -- but these will use pointer types, so they are recognized
6430      by the create_mem_ref heuristics anyway.  */
6431   if (cand->iv->base_object)
6432     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6433
6434   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6435   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6436                         reference_alias_ptr_type (*use->op_p),
6437                         iv, base_hint, data->speed);
6438   copy_ref_info (ref, *use->op_p);
6439   *use->op_p = ref;
6440 }
6441
6442 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6443    candidate CAND.  */
6444
6445 static void
6446 rewrite_use_compare (struct ivopts_data *data,
6447                      struct iv_use *use, struct iv_cand *cand)
6448 {
6449   tree comp, *var_p, op, bound;
6450   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6451   enum tree_code compare;
6452   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6453   bool ok;
6454
6455   bound = cp->value;
6456   if (bound)
6457     {
6458       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6459       tree var_type = TREE_TYPE (var);
6460       gimple_seq stmts;
6461
6462       if (dump_file && (dump_flags & TDF_DETAILS))
6463         {
6464           fprintf (dump_file, "Replacing exit test: ");
6465           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6466         }
6467       compare = cp->comp;
6468       bound = unshare_expr (fold_convert (var_type, bound));
6469       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6470       if (stmts)
6471         gsi_insert_seq_on_edge_immediate (
6472                 loop_preheader_edge (data->current_loop),
6473                 stmts);
6474
6475       gimple_cond_set_lhs (use->stmt, var);
6476       gimple_cond_set_code (use->stmt, compare);
6477       gimple_cond_set_rhs (use->stmt, op);
6478       return;
6479     }
6480
6481   /* The induction variable elimination failed; just express the original
6482      giv.  */
6483   comp = get_computation (data->current_loop, use, cand);
6484   gcc_assert (comp != NULL_TREE);
6485
6486   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6487   gcc_assert (ok);
6488
6489   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6490                                      true, GSI_SAME_STMT);
6491 }
6492
6493 /* Rewrites USE using candidate CAND.  */
6494
6495 static void
6496 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6497 {
6498   switch (use->type)
6499     {
6500       case USE_NONLINEAR_EXPR:
6501         rewrite_use_nonlinear_expr (data, use, cand);
6502         break;
6503
6504       case USE_ADDRESS:
6505         rewrite_use_address (data, use, cand);
6506         break;
6507
6508       case USE_COMPARE:
6509         rewrite_use_compare (data, use, cand);
6510         break;
6511
6512       default:
6513         gcc_unreachable ();
6514     }
6515
6516   update_stmt (use->stmt);
6517 }
6518
6519 /* Rewrite the uses using the selected induction variables.  */
6520
6521 static void
6522 rewrite_uses (struct ivopts_data *data)
6523 {
6524   unsigned i;
6525   struct iv_cand *cand;
6526   struct iv_use *use;
6527
6528   for (i = 0; i < n_iv_uses (data); i++)
6529     {
6530       use = iv_use (data, i);
6531       cand = use->selected;
6532       gcc_assert (cand);
6533
6534       rewrite_use (data, use, cand);
6535     }
6536 }
6537
6538 /* Removes the ivs that are not used after rewriting.  */
6539
6540 static void
6541 remove_unused_ivs (struct ivopts_data *data)
6542 {
6543   unsigned j;
6544   bitmap_iterator bi;
6545   bitmap toremove = BITMAP_ALLOC (NULL);
6546
6547   /* Figure out an order in which to release SSA DEFs so that we don't
6548      release something that we'd have to propagate into a debug stmt
6549      afterwards.  */
6550   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6551     {
6552       struct version_info *info;
6553
6554       info = ver_info (data, j);
6555       if (info->iv
6556           && !integer_zerop (info->iv->step)
6557           && !info->inv_id
6558           && !info->iv->have_use_for
6559           && !info->preserve_biv)
6560         {
6561           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6562
6563           tree def = info->iv->ssa_name;
6564
6565           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6566             {
6567               imm_use_iterator imm_iter;
6568               use_operand_p use_p;
6569               gimple stmt;
6570               int count = 0;
6571
6572               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6573                 {
6574                   if (!gimple_debug_bind_p (stmt))
6575                     continue;
6576
6577                   /* We just want to determine whether to do nothing
6578                      (count == 0), to substitute the computed
6579                      expression into a single use of the SSA DEF by
6580                      itself (count == 1), or to use a debug temp
6581                      because the SSA DEF is used multiple times or as
6582                      part of a larger expression (count > 1). */
6583                   count++;
6584                   if (gimple_debug_bind_get_value (stmt) != def)
6585                     count++;
6586
6587                   if (count > 1)
6588                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6589                 }
6590
6591               if (!count)
6592                 continue;
6593
6594               struct iv_use dummy_use;
6595               struct iv_cand *best_cand = NULL, *cand;
6596               unsigned i, best_pref = 0, cand_pref;
6597
6598               memset (&dummy_use, 0, sizeof (dummy_use));
6599               dummy_use.iv = info->iv;
6600               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6601                 {
6602                   cand = iv_use (data, i)->selected;
6603                   if (cand == best_cand)
6604                     continue;
6605                   cand_pref = operand_equal_p (cand->iv->step,
6606                                                info->iv->step, 0)
6607                     ? 4 : 0;
6608                   cand_pref
6609                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6610                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6611                     ? 2 : 0;
6612                   cand_pref
6613                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6614                     ? 1 : 0;
6615                   if (best_cand == NULL || best_pref < cand_pref)
6616                     {
6617                       best_cand = cand;
6618                       best_pref = cand_pref;
6619                     }
6620                 }
6621
6622               if (!best_cand)
6623                 continue;
6624
6625               tree comp = get_computation_at (data->current_loop,
6626                                               &dummy_use, best_cand,
6627                                               SSA_NAME_DEF_STMT (def));
6628               if (!comp)
6629                 continue;
6630
6631               if (count > 1)
6632                 {
6633                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6634                   DECL_ARTIFICIAL (vexpr) = 1;
6635                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6636                   if (SSA_NAME_VAR (def))
6637                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6638                   else
6639                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6640                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6641                   gimple_stmt_iterator gsi;
6642
6643                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6644                     gsi = gsi_after_labels (gimple_bb
6645                                             (SSA_NAME_DEF_STMT (def)));
6646                   else
6647                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6648
6649                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6650                   comp = vexpr;
6651                 }
6652
6653               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6654                 {
6655                   if (!gimple_debug_bind_p (stmt))
6656                     continue;
6657
6658                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6659                     SET_USE (use_p, comp);
6660
6661                   update_stmt (stmt);
6662                 }
6663             }
6664         }
6665     }
6666
6667   release_defs_bitset (toremove);
6668
6669   BITMAP_FREE (toremove);
6670 }
6671
6672 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6673    for pointer_map_traverse.  */
6674
6675 static bool
6676 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6677                       void *data ATTRIBUTE_UNUSED)
6678 {
6679   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6680
6681   free (niter);
6682   return true;
6683 }
6684
6685 /* Frees data allocated by the optimization of a single loop.  */
6686
6687 static void
6688 free_loop_data (struct ivopts_data *data)
6689 {
6690   unsigned i, j;
6691   bitmap_iterator bi;
6692   tree obj;
6693
6694   if (data->niters)
6695     {
6696       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6697       pointer_map_destroy (data->niters);
6698       data->niters = NULL;
6699     }
6700
6701   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6702     {
6703       struct version_info *info;
6704
6705       info = ver_info (data, i);
6706       free (info->iv);
6707       info->iv = NULL;
6708       info->has_nonlin_use = false;
6709       info->preserve_biv = false;
6710       info->inv_id = 0;
6711     }
6712   bitmap_clear (data->relevant);
6713   bitmap_clear (data->important_candidates);
6714
6715   for (i = 0; i < n_iv_uses (data); i++)
6716     {
6717       struct iv_use *use = iv_use (data, i);
6718
6719       free (use->iv);
6720       BITMAP_FREE (use->related_cands);
6721       for (j = 0; j < use->n_map_members; j++)
6722         if (use->cost_map[j].depends_on)
6723           BITMAP_FREE (use->cost_map[j].depends_on);
6724       free (use->cost_map);
6725       free (use);
6726     }
6727   data->iv_uses.truncate (0);
6728
6729   for (i = 0; i < n_iv_cands (data); i++)
6730     {
6731       struct iv_cand *cand = iv_cand (data, i);
6732
6733       free (cand->iv);
6734       if (cand->depends_on)
6735         BITMAP_FREE (cand->depends_on);
6736       free (cand);
6737     }
6738   data->iv_candidates.truncate (0);
6739
6740   if (data->version_info_size < num_ssa_names)
6741     {
6742       data->version_info_size = 2 * num_ssa_names;
6743       free (data->version_info);
6744       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6745     }
6746
6747   data->max_inv_id = 0;
6748
6749   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6750     SET_DECL_RTL (obj, NULL_RTX);
6751
6752   decl_rtl_to_reset.truncate (0);
6753
6754   data->inv_expr_tab.empty ();
6755   data->inv_expr_id = 0;
6756 }
6757
6758 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6759    loop tree.  */
6760
6761 static void
6762 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6763 {
6764   free_loop_data (data);
6765   free (data->version_info);
6766   BITMAP_FREE (data->relevant);
6767   BITMAP_FREE (data->important_candidates);
6768
6769   decl_rtl_to_reset.release ();
6770   data->iv_uses.release ();
6771   data->iv_candidates.release ();
6772   data->inv_expr_tab.dispose ();
6773 }
6774
6775 /* Returns true if the loop body BODY includes any function calls.  */
6776
6777 static bool
6778 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6779 {
6780   gimple_stmt_iterator gsi;
6781   unsigned i;
6782
6783   for (i = 0; i < num_nodes; i++)
6784     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6785       {
6786         gimple stmt = gsi_stmt (gsi);
6787         if (is_gimple_call (stmt)
6788             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6789           return true;
6790       }
6791   return false;
6792 }
6793
6794 /* Optimizes the LOOP.  Returns true if anything changed.  */
6795
6796 static bool
6797 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6798 {
6799   bool changed = false;
6800   struct iv_ca *iv_ca;
6801   edge exit = single_dom_exit (loop);
6802   basic_block *body;
6803
6804   gcc_assert (!data->niters);
6805   data->current_loop = loop;
6806   data->speed = optimize_loop_for_speed_p (loop);
6807
6808   if (dump_file && (dump_flags & TDF_DETAILS))
6809     {
6810       fprintf (dump_file, "Processing loop %d\n", loop->num);
6811
6812       if (exit)
6813         {
6814           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6815                    exit->src->index, exit->dest->index);
6816           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6817           fprintf (dump_file, "\n");
6818         }
6819
6820       fprintf (dump_file, "\n");
6821     }
6822
6823   body = get_loop_body (loop);
6824   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6825   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6826   free (body);
6827
6828   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6829
6830   /* For each ssa name determines whether it behaves as an induction variable
6831      in some loop.  */
6832   if (!find_induction_variables (data))
6833     goto finish;
6834
6835   /* Finds interesting uses (item 1).  */
6836   find_interesting_uses (data);
6837   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6838     goto finish;
6839
6840   /* Finds candidates for the induction variables (item 2).  */
6841   find_iv_candidates (data);
6842
6843   /* Calculates the costs (item 3, part 1).  */
6844   determine_iv_costs (data);
6845   determine_use_iv_costs (data);
6846   determine_set_costs (data);
6847
6848   /* Find the optimal set of induction variables (item 3, part 2).  */
6849   iv_ca = find_optimal_iv_set (data);
6850   if (!iv_ca)
6851     goto finish;
6852   changed = true;
6853
6854   /* Create the new induction variables (item 4, part 1).  */
6855   create_new_ivs (data, iv_ca);
6856   iv_ca_free (&iv_ca);
6857
6858   /* Rewrite the uses (item 4, part 2).  */
6859   rewrite_uses (data);
6860
6861   /* Remove the ivs that are unused after rewriting.  */
6862   remove_unused_ivs (data);
6863
6864   /* We have changed the structure of induction variables; it might happen
6865      that definitions in the scev database refer to some of them that were
6866      eliminated.  */
6867   scev_reset ();
6868
6869 finish:
6870   free_loop_data (data);
6871
6872   return changed;
6873 }
6874
6875 /* Main entry point.  Optimizes induction variables in loops.  */
6876
6877 void
6878 tree_ssa_iv_optimize (void)
6879 {
6880   struct loop *loop;
6881   struct ivopts_data data;
6882
6883   tree_ssa_iv_optimize_init (&data);
6884
6885   /* Optimize the loops starting with the innermost ones.  */
6886   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6887     {
6888       if (dump_file && (dump_flags & TDF_DETAILS))
6889         flow_loop_dump (loop, dump_file, NULL, 1);
6890
6891       tree_ssa_iv_optimize_loop (&data, loop);
6892     }
6893
6894   tree_ssa_iv_optimize_finalize (&data);
6895 }