gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "tree-flow.h"
  73 #include "cfgloop.h"
  74 #include "tree-pass.h"
  75 #include "ggc.h"
  76 #include "insn-config.h"
  77 #include "pointer-set.h"
  78 #include "hash-table.h"
  79 #include "tree-chrec.h"
  80 #include "tree-scalar-evolution.h"
  81 #include "cfgloop.h"
  82 #include "params.h"
  83 #include "langhooks.h"
  84 #include "tree-affine.h"
  85 #include "target.h"
  86 #include "tree-inline.h"
  87 #include "tree-ssa-propagate.h"
  88 #include "expmed.h"
  89
  90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  91    cost of different addressing modes.  This should be moved to a TBD
  92    interface between the GIMPLE and RTL worlds.  */
  93 #include "expr.h"
  94 #include "recog.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101 /* Returns the expected number of loop iterations for LOOP.
 102    The average trip count is computed from profile data if it
 103    exists. */
 104
 105 static inline HOST_WIDE_INT
 106 avg_loop_niter (struct loop *loop)
 107 {
 108   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 109   if (niter == -1)
 110     return AVG_LOOP_NITER (loop);
 111
 112   return niter;
 113 }
 114
 115 /* Representation of the induction variable.  */
 116 struct iv
 117 {
 118   tree base;            /* Initial value of the iv.  */
 119   tree base_object;     /* A memory object to that the induction variable points.  */
 120   tree step;            /* Step of the iv (constant only).  */
 121   tree ssa_name;        /* The ssa name with the value.  */
 122   bool biv_p;           /* Is it a biv?  */
 123   bool have_use_for;    /* Do we already have a use for it?  */
 124   unsigned use_id;      /* The identifier in the use if it is the case.  */
 125 };
 126
 127 /* Per-ssa version information (induction variable descriptions, etc.).  */
 128 struct version_info
 129 {
 130   tree name;            /* The ssa name.  */
 131   struct iv *iv;        /* Induction variable description.  */
 132   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 133                            an expression that is not an induction variable.  */
 134   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 135   unsigned inv_id;      /* Id of an invariant.  */
 136 };
 137
 138 /* Types of uses.  */
 139 enum use_type
 140 {
 141   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 142   USE_ADDRESS,          /* Use in an address.  */
 143   USE_COMPARE           /* Use is a compare.  */
 144 };
 145
 146 /* Cost of a computation.  */
 147 typedef struct
 148 {
 149   int cost;             /* The runtime cost.  */
 150   unsigned complexity;  /* The estimate of the complexity of the code for
 151                            the computation (in no concrete units --
 152                            complexity field should be larger for more
 153                            complex expressions and addressing modes).  */
 154 } comp_cost;
 155
 156 static const comp_cost no_cost = {0, 0};
 157 static const comp_cost infinite_cost = {INFTY, INFTY};
 158
 159 /* The candidate - cost pair.  */
 160 struct cost_pair
 161 {
 162   struct iv_cand *cand; /* The candidate.  */
 163   comp_cost cost;       /* The cost.  */
 164   bitmap depends_on;    /* The list of invariants that have to be
 165                            preserved.  */
 166   tree value;           /* For final value elimination, the expression for
 167                            the final value of the iv.  For iv elimination,
 168                            the new bound to compare with.  */
 169   enum tree_code comp;  /* For iv elimination, the comparison.  */
 170   int inv_expr_id;      /* Loop invariant expression id.  */
 171 };
 172
 173 /* Use.  */
 174 struct iv_use
 175 {
 176   unsigned id;          /* The id of the use.  */
 177   enum use_type type;   /* Type of the use.  */
 178   struct iv *iv;        /* The induction variable it is based on.  */
 179   gimple stmt;          /* Statement in that it occurs.  */
 180   tree *op_p;           /* The place where it occurs.  */
 181   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 182                            important ones.  */
 183
 184   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 185   struct cost_pair *cost_map;
 186                         /* The costs wrto the iv candidates.  */
 187
 188   struct iv_cand *selected;
 189                         /* The selected candidate.  */
 190 };
 191
 192 /* The position where the iv is computed.  */
 193 enum iv_position
 194 {
 195   IP_NORMAL,            /* At the end, just before the exit condition.  */
 196   IP_END,               /* At the end of the latch block.  */
 197   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 198   IP_AFTER_USE,         /* Immediately after a specific use.  */
 199   IP_ORIGINAL           /* The original biv.  */
 200 };
 201
 202 /* The induction variable candidate.  */
 203 struct iv_cand
 204 {
 205   unsigned id;          /* The number of the candidate.  */
 206   bool important;       /* Whether this is an "important" candidate, i.e. such
 207                            that it should be considered by all uses.  */
 208   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 209   gimple incremented_at;/* For original biv, the statement where it is
 210                            incremented.  */
 211   tree var_before;      /* The variable used for it before increment.  */
 212   tree var_after;       /* The variable used for it after increment.  */
 213   struct iv *iv;        /* The value of the candidate.  NULL for
 214                            "pseudocandidate" used to indicate the possibility
 215                            to replace the final value of an iv by direct
 216                            computation of the value.  */
 217   unsigned cost;        /* Cost of the candidate.  */
 218   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 219   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 220                               where it is incremented.  */
 221   bitmap depends_on;    /* The list of invariants that are used in step of the
 222                            biv.  */
 223 };
 224
 225 /* Loop invariant expression hashtable entry.  */
 226 struct iv_inv_expr_ent
 227 {
 228   tree expr;
 229   int id;
 230   hashval_t hash;
 231 };
 232
 233 /* The data used by the induction variable optimizations.  */
 234
 235 typedef struct iv_use *iv_use_p;
 236
 237 typedef struct iv_cand *iv_cand_p;
 238
 239 /* Hashtable helpers.  */
 240
 241 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 242 {
 243   typedef iv_inv_expr_ent value_type;
 244   typedef iv_inv_expr_ent compare_type;
 245   static inline hashval_t hash (const value_type *);
 246   static inline bool equal (const value_type *, const compare_type *);
 247 };
 248
 249 /* Hash function for loop invariant expressions.  */
 250
 251 inline hashval_t
 252 iv_inv_expr_hasher::hash (const value_type *expr)
 253 {
 254   return expr->hash;
 255 }
 256
 257 /* Hash table equality function for expressions.  */
 258
 259 inline bool
 260 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 261 {
 262   return expr1->hash == expr2->hash
 263          && operand_equal_p (expr1->expr, expr2->expr, 0);
 264 }
 265
 266 struct ivopts_data
 267 {
 268   /* The currently optimized loop.  */
 269   struct loop *current_loop;
 270
 271   /* Numbers of iterations for all exits of the current loop.  */
 272   struct pointer_map_t *niters;
 273
 274   /* Number of registers used in it.  */
 275   unsigned regs_used;
 276
 277   /* The size of version_info array allocated.  */
 278   unsigned version_info_size;
 279
 280   /* The array of information for the ssa names.  */
 281   struct version_info *version_info;
 282
 283   /* The hashtable of loop invariant expressions created
 284      by ivopt.  */
 285   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 286
 287   /* Loop invariant expression id.  */
 288   int inv_expr_id;
 289
 290   /* The bitmap of indices in version_info whose value was changed.  */
 291   bitmap relevant;
 292
 293   /* The uses of induction variables.  */
 294   vec<iv_use_p> iv_uses;
 295
 296   /* The candidates.  */
 297   vec<iv_cand_p> iv_candidates;
 298
 299   /* A bitmap of important candidates.  */
 300   bitmap important_candidates;
 301
 302   /* The maximum invariant id.  */
 303   unsigned max_inv_id;
 304
 305   /* Whether to consider just related and important candidates when replacing a
 306      use.  */
 307   bool consider_all_candidates;
 308
 309   /* Are we optimizing for speed?  */
 310   bool speed;
 311
 312   /* Whether the loop body includes any function calls.  */
 313   bool body_includes_call;
 314
 315   /* Whether the loop body can only be exited via single exit.  */
 316   bool loop_single_exit_p;
 317 };
 318
 319 /* An assignment of iv candidates to uses.  */
 320
 321 struct iv_ca
 322 {
 323   /* The number of uses covered by the assignment.  */
 324   unsigned upto;
 325
 326   /* Number of uses that cannot be expressed by the candidates in the set.  */
 327   unsigned bad_uses;
 328
 329   /* Candidate assigned to a use, together with the related costs.  */
 330   struct cost_pair **cand_for_use;
 331
 332   /* Number of times each candidate is used.  */
 333   unsigned *n_cand_uses;
 334
 335   /* The candidates used.  */
 336   bitmap cands;
 337
 338   /* The number of candidates in the set.  */
 339   unsigned n_cands;
 340
 341   /* Total number of registers needed.  */
 342   unsigned n_regs;
 343
 344   /* Total cost of expressing uses.  */
 345   comp_cost cand_use_cost;
 346
 347   /* Total cost of candidates.  */
 348   unsigned cand_cost;
 349
 350   /* Number of times each invariant is used.  */
 351   unsigned *n_invariant_uses;
 352
 353   /* The array holding the number of uses of each loop
 354      invariant expressions created by ivopt.  */
 355   unsigned *used_inv_expr;
 356
 357   /* The number of created loop invariants.  */
 358   unsigned num_used_inv_expr;
 359
 360   /* Total cost of the assignment.  */
 361   comp_cost cost;
 362 };
 363
 364 /* Difference of two iv candidate assignments.  */
 365
 366 struct iv_ca_delta
 367 {
 368   /* Changed use.  */
 369   struct iv_use *use;
 370
 371   /* An old assignment (for rollback purposes).  */
 372   struct cost_pair *old_cp;
 373
 374   /* A new assignment.  */
 375   struct cost_pair *new_cp;
 376
 377   /* Next change in the list.  */
 378   struct iv_ca_delta *next_change;
 379 };
 380
 381 /* Bound on number of candidates below that all candidates are considered.  */
 382
 383 #define CONSIDER_ALL_CANDIDATES_BOUND \
 384   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 385
 386 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 387    optimizing such a loop would help, and it would take ages).  */
 388
 389 #define MAX_CONSIDERED_USES \
 390   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 391
 392 /* If there are at most this number of ivs in the set, try removing unnecessary
 393    ivs from the set always.  */
 394
 395 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 396   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 397
 398 /* The list of trees for that the decl_rtl field must be reset is stored
 399    here.  */
 400
 401 static vec<tree> decl_rtl_to_reset;
 402
 403 static comp_cost force_expr_to_var_cost (tree, bool);
 404
 405 /* Number of uses recorded in DATA.  */
 406
 407 static inline unsigned
 408 n_iv_uses (struct ivopts_data *data)
 409 {
 410   return data->iv_uses.length ();
 411 }
 412
 413 /* Ith use recorded in DATA.  */
 414
 415 static inline struct iv_use *
 416 iv_use (struct ivopts_data *data, unsigned i)
 417 {
 418   return data->iv_uses[i];
 419 }
 420
 421 /* Number of candidates recorded in DATA.  */
 422
 423 static inline unsigned
 424 n_iv_cands (struct ivopts_data *data)
 425 {
 426   return data->iv_candidates.length ();
 427 }
 428
 429 /* Ith candidate recorded in DATA.  */
 430
 431 static inline struct iv_cand *
 432 iv_cand (struct ivopts_data *data, unsigned i)
 433 {
 434   return data->iv_candidates[i];
 435 }
 436
 437 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 438
 439 edge
 440 single_dom_exit (struct loop *loop)
 441 {
 442   edge exit = single_exit (loop);
 443
 444   if (!exit)
 445     return NULL;
 446
 447   if (!just_once_each_iteration_p (loop, exit->src))
 448     return NULL;
 449
 450   return exit;
 451 }
 452
 453 /* Dumps information about the induction variable IV to FILE.  */
 454
 455 extern void dump_iv (FILE *, struct iv *);
 456 void
 457 dump_iv (FILE *file, struct iv *iv)
 458 {
 459   if (iv->ssa_name)
 460     {
 461       fprintf (file, "ssa name ");
 462       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 463       fprintf (file, "\n");
 464     }
 465
 466   fprintf (file, "  type ");
 467   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 468   fprintf (file, "\n");
 469
 470   if (iv->step)
 471     {
 472       fprintf (file, "  base ");
 473       print_generic_expr (file, iv->base, TDF_SLIM);
 474       fprintf (file, "\n");
 475
 476       fprintf (file, "  step ");
 477       print_generic_expr (file, iv->step, TDF_SLIM);
 478       fprintf (file, "\n");
 479     }
 480   else
 481     {
 482       fprintf (file, "  invariant ");
 483       print_generic_expr (file, iv->base, TDF_SLIM);
 484       fprintf (file, "\n");
 485     }
 486
 487   if (iv->base_object)
 488     {
 489       fprintf (file, "  base object ");
 490       print_generic_expr (file, iv->base_object, TDF_SLIM);
 491       fprintf (file, "\n");
 492     }
 493
 494   if (iv->biv_p)
 495     fprintf (file, "  is a biv\n");
 496 }
 497
 498 /* Dumps information about the USE to FILE.  */
 499
 500 extern void dump_use (FILE *, struct iv_use *);
 501 void
 502 dump_use (FILE *file, struct iv_use *use)
 503 {
 504   fprintf (file, "use %d\n", use->id);
 505
 506   switch (use->type)
 507     {
 508     case USE_NONLINEAR_EXPR:
 509       fprintf (file, "  generic\n");
 510       break;
 511
 512     case USE_ADDRESS:
 513       fprintf (file, "  address\n");
 514       break;
 515
 516     case USE_COMPARE:
 517       fprintf (file, "  compare\n");
 518       break;
 519
 520     default:
 521       gcc_unreachable ();
 522     }
 523
 524   fprintf (file, "  in statement ");
 525   print_gimple_stmt (file, use->stmt, 0, 0);
 526   fprintf (file, "\n");
 527
 528   fprintf (file, "  at position ");
 529   if (use->op_p)
 530     print_generic_expr (file, *use->op_p, TDF_SLIM);
 531   fprintf (file, "\n");
 532
 533   dump_iv (file, use->iv);
 534
 535   if (use->related_cands)
 536     {
 537       fprintf (file, "  related candidates ");
 538       dump_bitmap (file, use->related_cands);
 539     }
 540 }
 541
 542 /* Dumps information about the uses to FILE.  */
 543
 544 extern void dump_uses (FILE *, struct ivopts_data *);
 545 void
 546 dump_uses (FILE *file, struct ivopts_data *data)
 547 {
 548   unsigned i;
 549   struct iv_use *use;
 550
 551   for (i = 0; i < n_iv_uses (data); i++)
 552     {
 553       use = iv_use (data, i);
 554
 555       dump_use (file, use);
 556       fprintf (file, "\n");
 557     }
 558 }
 559
 560 /* Dumps information about induction variable candidate CAND to FILE.  */
 561
 562 extern void dump_cand (FILE *, struct iv_cand *);
 563 void
 564 dump_cand (FILE *file, struct iv_cand *cand)
 565 {
 566   struct iv *iv = cand->iv;
 567
 568   fprintf (file, "candidate %d%s\n",
 569            cand->id, cand->important ? " (important)" : "");
 570
 571   if (cand->depends_on)
 572     {
 573       fprintf (file, "  depends on ");
 574       dump_bitmap (file, cand->depends_on);
 575     }
 576
 577   if (!iv)
 578     {
 579       fprintf (file, "  final value replacement\n");
 580       return;
 581     }
 582
 583   if (cand->var_before)
 584     {
 585       fprintf (file, "  var_before ");
 586       print_generic_expr (file, cand->var_before, TDF_SLIM);
 587       fprintf (file, "\n");
 588     }
 589   if (cand->var_after)
 590     {
 591       fprintf (file, "  var_after ");
 592       print_generic_expr (file, cand->var_after, TDF_SLIM);
 593       fprintf (file, "\n");
 594     }
 595
 596   switch (cand->pos)
 597     {
 598     case IP_NORMAL:
 599       fprintf (file, "  incremented before exit test\n");
 600       break;
 601
 602     case IP_BEFORE_USE:
 603       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 604       break;
 605
 606     case IP_AFTER_USE:
 607       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 608       break;
 609
 610     case IP_END:
 611       fprintf (file, "  incremented at end\n");
 612       break;
 613
 614     case IP_ORIGINAL:
 615       fprintf (file, "  original biv\n");
 616       break;
 617     }
 618
 619   dump_iv (file, iv);
 620 }
 621
 622 /* Returns the info for ssa version VER.  */
 623
 624 static inline struct version_info *
 625 ver_info (struct ivopts_data *data, unsigned ver)
 626 {
 627   return data->version_info + ver;
 628 }
 629
 630 /* Returns the info for ssa name NAME.  */
 631
 632 static inline struct version_info *
 633 name_info (struct ivopts_data *data, tree name)
 634 {
 635   return ver_info (data, SSA_NAME_VERSION (name));
 636 }
 637
 638 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 639    emitted in LOOP.  */
 640
 641 static bool
 642 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 643 {
 644   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 645
 646   gcc_assert (bb);
 647
 648   if (sbb == loop->latch)
 649     return true;
 650
 651   if (sbb != bb)
 652     return false;
 653
 654   return stmt == last_stmt (bb);
 655 }
 656
 657 /* Returns true if STMT if after the place where the original induction
 658    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 659    if the positions are identical.  */
 660
 661 static bool
 662 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 663 {
 664   basic_block cand_bb = gimple_bb (cand->incremented_at);
 665   basic_block stmt_bb = gimple_bb (stmt);
 666
 667   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 668     return false;
 669
 670   if (stmt_bb != cand_bb)
 671     return true;
 672
 673   if (true_if_equal
 674       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 675     return true;
 676   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 677 }
 678
 679 /* Returns true if STMT if after the place where the induction variable
 680    CAND is incremented in LOOP.  */
 681
 682 static bool
 683 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 684 {
 685   switch (cand->pos)
 686     {
 687     case IP_END:
 688       return false;
 689
 690     case IP_NORMAL:
 691       return stmt_after_ip_normal_pos (loop, stmt);
 692
 693     case IP_ORIGINAL:
 694     case IP_AFTER_USE:
 695       return stmt_after_inc_pos (cand, stmt, false);
 696
 697     case IP_BEFORE_USE:
 698       return stmt_after_inc_pos (cand, stmt, true);
 699
 700     default:
 701       gcc_unreachable ();
 702     }
 703 }
 704
 705 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 706
 707 static bool
 708 abnormal_ssa_name_p (tree exp)
 709 {
 710   if (!exp)
 711     return false;
 712
 713   if (TREE_CODE (exp) != SSA_NAME)
 714     return false;
 715
 716   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 717 }
 718
 719 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 720    abnormal phi node.  Callback for for_each_index.  */
 721
 722 static bool
 723 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 724                                   void *data ATTRIBUTE_UNUSED)
 725 {
 726   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 727     {
 728       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 729         return false;
 730       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 731         return false;
 732     }
 733
 734   return !abnormal_ssa_name_p (*index);
 735 }
 736
 737 /* Returns true if EXPR contains a ssa name that occurs in an
 738    abnormal phi node.  */
 739
 740 bool
 741 contains_abnormal_ssa_name_p (tree expr)
 742 {
 743   enum tree_code code;
 744   enum tree_code_class codeclass;
 745
 746   if (!expr)
 747     return false;
 748
 749   code = TREE_CODE (expr);
 750   codeclass = TREE_CODE_CLASS (code);
 751
 752   if (code == SSA_NAME)
 753     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 754
 755   if (code == INTEGER_CST
 756       || is_gimple_min_invariant (expr))
 757     return false;
 758
 759   if (code == ADDR_EXPR)
 760     return !for_each_index (&TREE_OPERAND (expr, 0),
 761                             idx_contains_abnormal_ssa_name_p,
 762                             NULL);
 763
 764   if (code == COND_EXPR)
 765     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 766       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 767       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 768
 769   switch (codeclass)
 770     {
 771     case tcc_binary:
 772     case tcc_comparison:
 773       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 774         return true;
 775
 776       /* Fallthru.  */
 777     case tcc_unary:
 778       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 779         return true;
 780
 781       break;
 782
 783     default:
 784       gcc_unreachable ();
 785     }
 786
 787   return false;
 788 }
 789
 790 /*  Returns the structure describing number of iterations determined from
 791     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 792
 793 static struct tree_niter_desc *
 794 niter_for_exit (struct ivopts_data *data, edge exit)
 795 {
 796   struct tree_niter_desc *desc;
 797   void **slot;
 798
 799   if (!data->niters)
 800     {
 801       data->niters = pointer_map_create ();
 802       slot = NULL;
 803     }
 804   else
 805     slot = pointer_map_contains (data->niters, exit);
 806
 807   if (!slot)
 808     {
 809       /* Try to determine number of iterations.  We cannot safely work with ssa
 810          names that appear in phi nodes on abnormal edges, so that we do not
 811          create overlapping life ranges for them (PR 27283).  */
 812       desc = XNEW (struct tree_niter_desc);
 813       if (!number_of_iterations_exit (data->current_loop,
 814                                       exit, desc, true)
 815           || contains_abnormal_ssa_name_p (desc->niter))
 816         {
 817           XDELETE (desc);
 818           desc = NULL;
 819         }
 820       slot = pointer_map_insert (data->niters, exit);
 821       *slot = desc;
 822     }
 823   else
 824     desc = (struct tree_niter_desc *) *slot;
 825
 826   return desc;
 827 }
 828
 829 /* Returns the structure describing number of iterations determined from
 830    single dominating exit of DATA->current_loop, or NULL if something
 831    goes wrong.  */
 832
 833 static struct tree_niter_desc *
 834 niter_for_single_dom_exit (struct ivopts_data *data)
 835 {
 836   edge exit = single_dom_exit (data->current_loop);
 837
 838   if (!exit)
 839     return NULL;
 840
 841   return niter_for_exit (data, exit);
 842 }
 843
 844 /* Initializes data structures used by the iv optimization pass, stored
 845    in DATA.  */
 846
 847 static void
 848 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 849 {
 850   data->version_info_size = 2 * num_ssa_names;
 851   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 852   data->relevant = BITMAP_ALLOC (NULL);
 853   data->important_candidates = BITMAP_ALLOC (NULL);
 854   data->max_inv_id = 0;
 855   data->niters = NULL;
 856   data->iv_uses.create (20);
 857   data->iv_candidates.create (20);
 858   data->inv_expr_tab.create (10);
 859   data->inv_expr_id = 0;
 860   decl_rtl_to_reset.create (20);
 861 }
 862
 863 /* Returns a memory object to that EXPR points.  In case we are able to
 864    determine that it does not point to any such object, NULL is returned.  */
 865
 866 static tree
 867 determine_base_object (tree expr)
 868 {
 869   enum tree_code code = TREE_CODE (expr);
 870   tree base, obj;
 871
 872   /* If this is a pointer casted to any type, we need to determine
 873      the base object for the pointer; so handle conversions before
 874      throwing away non-pointer expressions.  */
 875   if (CONVERT_EXPR_P (expr))
 876     return determine_base_object (TREE_OPERAND (expr, 0));
 877
 878   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 879     return NULL_TREE;
 880
 881   switch (code)
 882     {
 883     case INTEGER_CST:
 884       return NULL_TREE;
 885
 886     case ADDR_EXPR:
 887       obj = TREE_OPERAND (expr, 0);
 888       base = get_base_address (obj);
 889
 890       if (!base)
 891         return expr;
 892
 893       if (TREE_CODE (base) == MEM_REF)
 894         return determine_base_object (TREE_OPERAND (base, 0));
 895
 896       return fold_convert (ptr_type_node,
 897                            build_fold_addr_expr (base));
 898
 899     case POINTER_PLUS_EXPR:
 900       return determine_base_object (TREE_OPERAND (expr, 0));
 901
 902     case PLUS_EXPR:
 903     case MINUS_EXPR:
 904       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 905       gcc_unreachable ();
 906
 907     default:
 908       return fold_convert (ptr_type_node, expr);
 909     }
 910 }
 911
 912 /* Allocates an induction variable with given initial value BASE and step STEP
 913    for loop LOOP.  */
 914
 915 static struct iv *
 916 alloc_iv (tree base, tree step)
 917 {
 918   struct iv *iv = XCNEW (struct iv);
 919   gcc_assert (step != NULL_TREE);
 920
 921   iv->base = base;
 922   iv->base_object = determine_base_object (base);
 923   iv->step = step;
 924   iv->biv_p = false;
 925   iv->have_use_for = false;
 926   iv->use_id = 0;
 927   iv->ssa_name = NULL_TREE;
 928
 929   return iv;
 930 }
 931
 932 /* Sets STEP and BASE for induction variable IV.  */
 933
 934 static void
 935 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 936 {
 937   struct version_info *info = name_info (data, iv);
 938
 939   gcc_assert (!info->iv);
 940
 941   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 942   info->iv = alloc_iv (base, step);
 943   info->iv->ssa_name = iv;
 944 }
 945
 946 /* Finds induction variable declaration for VAR.  */
 947
 948 static struct iv *
 949 get_iv (struct ivopts_data *data, tree var)
 950 {
 951   basic_block bb;
 952   tree type = TREE_TYPE (var);
 953
 954   if (!POINTER_TYPE_P (type)
 955       && !INTEGRAL_TYPE_P (type))
 956     return NULL;
 957
 958   if (!name_info (data, var)->iv)
 959     {
 960       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 961
 962       if (!bb
 963           || !flow_bb_inside_loop_p (data->current_loop, bb))
 964         set_iv (data, var, var, build_int_cst (type, 0));
 965     }
 966
 967   return name_info (data, var)->iv;
 968 }
 969
 970 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 971    not define a simple affine biv with nonzero step.  */
 972
 973 static tree
 974 determine_biv_step (gimple phi)
 975 {
 976   struct loop *loop = gimple_bb (phi)->loop_father;
 977   tree name = PHI_RESULT (phi);
 978   affine_iv iv;
 979
 980   if (virtual_operand_p (name))
 981     return NULL_TREE;
 982
 983   if (!simple_iv (loop, loop, name, &iv, true))
 984     return NULL_TREE;
 985
 986   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 987 }
 988
 989 /* Finds basic ivs.  */
 990
 991 static bool
 992 find_bivs (struct ivopts_data *data)
 993 {
 994   gimple phi;
 995   tree step, type, base;
 996   bool found = false;
 997   struct loop *loop = data->current_loop;
 998   gimple_stmt_iterator psi;
 999
1000   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1001     {
1002       phi = gsi_stmt (psi);
1003
1004       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1005         continue;
1006
1007       step = determine_biv_step (phi);
1008       if (!step)
1009         continue;
1010
1011       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1012       base = expand_simple_operations (base);
1013       if (contains_abnormal_ssa_name_p (base)
1014           || contains_abnormal_ssa_name_p (step))
1015         continue;
1016
1017       type = TREE_TYPE (PHI_RESULT (phi));
1018       base = fold_convert (type, base);
1019       if (step)
1020         {
1021           if (POINTER_TYPE_P (type))
1022             step = convert_to_ptrofftype (step);
1023           else
1024             step = fold_convert (type, step);
1025         }
1026
1027       set_iv (data, PHI_RESULT (phi), base, step);
1028       found = true;
1029     }
1030
1031   return found;
1032 }
1033
1034 /* Marks basic ivs.  */
1035
1036 static void
1037 mark_bivs (struct ivopts_data *data)
1038 {
1039   gimple phi;
1040   tree var;
1041   struct iv *iv, *incr_iv;
1042   struct loop *loop = data->current_loop;
1043   basic_block incr_bb;
1044   gimple_stmt_iterator psi;
1045
1046   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1047     {
1048       phi = gsi_stmt (psi);
1049
1050       iv = get_iv (data, PHI_RESULT (phi));
1051       if (!iv)
1052         continue;
1053
1054       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1055       incr_iv = get_iv (data, var);
1056       if (!incr_iv)
1057         continue;
1058
1059       /* If the increment is in the subloop, ignore it.  */
1060       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1061       if (incr_bb->loop_father != data->current_loop
1062           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1063         continue;
1064
1065       iv->biv_p = true;
1066       incr_iv->biv_p = true;
1067     }
1068 }
1069
1070 /* Checks whether STMT defines a linear induction variable and stores its
1071    parameters to IV.  */
1072
1073 static bool
1074 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1075 {
1076   tree lhs;
1077   struct loop *loop = data->current_loop;
1078
1079   iv->base = NULL_TREE;
1080   iv->step = NULL_TREE;
1081
1082   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1083     return false;
1084
1085   lhs = gimple_assign_lhs (stmt);
1086   if (TREE_CODE (lhs) != SSA_NAME)
1087     return false;
1088
1089   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1090     return false;
1091   iv->base = expand_simple_operations (iv->base);
1092
1093   if (contains_abnormal_ssa_name_p (iv->base)
1094       || contains_abnormal_ssa_name_p (iv->step))
1095     return false;
1096
1097   /* If STMT could throw, then do not consider STMT as defining a GIV.
1098      While this will suppress optimizations, we can not safely delete this
1099      GIV and associated statements, even if it appears it is not used.  */
1100   if (stmt_could_throw_p (stmt))
1101     return false;
1102
1103   return true;
1104 }
1105
1106 /* Finds general ivs in statement STMT.  */
1107
1108 static void
1109 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1110 {
1111   affine_iv iv;
1112
1113   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1114     return;
1115
1116   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1117 }
1118
1119 /* Finds general ivs in basic block BB.  */
1120
1121 static void
1122 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1123 {
1124   gimple_stmt_iterator bsi;
1125
1126   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1127     find_givs_in_stmt (data, gsi_stmt (bsi));
1128 }
1129
1130 /* Finds general ivs.  */
1131
1132 static void
1133 find_givs (struct ivopts_data *data)
1134 {
1135   struct loop *loop = data->current_loop;
1136   basic_block *body = get_loop_body_in_dom_order (loop);
1137   unsigned i;
1138
1139   for (i = 0; i < loop->num_nodes; i++)
1140     find_givs_in_bb (data, body[i]);
1141   free (body);
1142 }
1143
1144 /* For each ssa name defined in LOOP determines whether it is an induction
1145    variable and if so, its initial value and step.  */
1146
1147 static bool
1148 find_induction_variables (struct ivopts_data *data)
1149 {
1150   unsigned i;
1151   bitmap_iterator bi;
1152
1153   if (!find_bivs (data))
1154     return false;
1155
1156   find_givs (data);
1157   mark_bivs (data);
1158
1159   if (dump_file && (dump_flags & TDF_DETAILS))
1160     {
1161       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1162
1163       if (niter)
1164         {
1165           fprintf (dump_file, "  number of iterations ");
1166           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1167           if (!integer_zerop (niter->may_be_zero))
1168             {
1169               fprintf (dump_file, "; zero if ");
1170               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1171             }
1172           fprintf (dump_file, "\n\n");
1173         };
1174
1175       fprintf (dump_file, "Induction variables:\n\n");
1176
1177       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1178         {
1179           if (ver_info (data, i)->iv)
1180             dump_iv (dump_file, ver_info (data, i)->iv);
1181         }
1182     }
1183
1184   return true;
1185 }
1186
1187 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1188
1189 static struct iv_use *
1190 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1191             gimple stmt, enum use_type use_type)
1192 {
1193   struct iv_use *use = XCNEW (struct iv_use);
1194
1195   use->id = n_iv_uses (data);
1196   use->type = use_type;
1197   use->iv = iv;
1198   use->stmt = stmt;
1199   use->op_p = use_p;
1200   use->related_cands = BITMAP_ALLOC (NULL);
1201
1202   /* To avoid showing ssa name in the dumps, if it was not reset by the
1203      caller.  */
1204   iv->ssa_name = NULL_TREE;
1205
1206   if (dump_file && (dump_flags & TDF_DETAILS))
1207     dump_use (dump_file, use);
1208
1209   data->iv_uses.safe_push (use);
1210
1211   return use;
1212 }
1213
1214 /* Checks whether OP is a loop-level invariant and if so, records it.
1215    NONLINEAR_USE is true if the invariant is used in a way we do not
1216    handle specially.  */
1217
1218 static void
1219 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1220 {
1221   basic_block bb;
1222   struct version_info *info;
1223
1224   if (TREE_CODE (op) != SSA_NAME
1225       || virtual_operand_p (op))
1226     return;
1227
1228   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1229   if (bb
1230       && flow_bb_inside_loop_p (data->current_loop, bb))
1231     return;
1232
1233   info = name_info (data, op);
1234   info->name = op;
1235   info->has_nonlin_use |= nonlinear_use;
1236   if (!info->inv_id)
1237     info->inv_id = ++data->max_inv_id;
1238   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1239 }
1240
1241 /* Checks whether the use OP is interesting and if so, records it.  */
1242
1243 static struct iv_use *
1244 find_interesting_uses_op (struct ivopts_data *data, tree op)
1245 {
1246   struct iv *iv;
1247   struct iv *civ;
1248   gimple stmt;
1249   struct iv_use *use;
1250
1251   if (TREE_CODE (op) != SSA_NAME)
1252     return NULL;
1253
1254   iv = get_iv (data, op);
1255   if (!iv)
1256     return NULL;
1257
1258   if (iv->have_use_for)
1259     {
1260       use = iv_use (data, iv->use_id);
1261
1262       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1263       return use;
1264     }
1265
1266   if (integer_zerop (iv->step))
1267     {
1268       record_invariant (data, op, true);
1269       return NULL;
1270     }
1271   iv->have_use_for = true;
1272
1273   civ = XNEW (struct iv);
1274   *civ = *iv;
1275
1276   stmt = SSA_NAME_DEF_STMT (op);
1277   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1278               || is_gimple_assign (stmt));
1279
1280   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1281   iv->use_id = use->id;
1282
1283   return use;
1284 }
1285
1286 /* Given a condition in statement STMT, checks whether it is a compare
1287    of an induction variable and an invariant.  If this is the case,
1288    CONTROL_VAR is set to location of the iv, BOUND to the location of
1289    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1290    induction variable descriptions, and true is returned.  If this is not
1291    the case, CONTROL_VAR and BOUND are set to the arguments of the
1292    condition and false is returned.  */
1293
1294 static bool
1295 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1296                        tree **control_var, tree **bound,
1297                        struct iv **iv_var, struct iv **iv_bound)
1298 {
1299   /* The objects returned when COND has constant operands.  */
1300   static struct iv const_iv;
1301   static tree zero;
1302   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1303   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1304   bool ret = false;
1305
1306   if (gimple_code (stmt) == GIMPLE_COND)
1307     {
1308       op0 = gimple_cond_lhs_ptr (stmt);
1309       op1 = gimple_cond_rhs_ptr (stmt);
1310     }
1311   else
1312     {
1313       op0 = gimple_assign_rhs1_ptr (stmt);
1314       op1 = gimple_assign_rhs2_ptr (stmt);
1315     }
1316
1317   zero = integer_zero_node;
1318   const_iv.step = integer_zero_node;
1319
1320   if (TREE_CODE (*op0) == SSA_NAME)
1321     iv0 = get_iv (data, *op0);
1322   if (TREE_CODE (*op1) == SSA_NAME)
1323     iv1 = get_iv (data, *op1);
1324
1325   /* Exactly one of the compared values must be an iv, and the other one must
1326      be an invariant.  */
1327   if (!iv0 || !iv1)
1328     goto end;
1329
1330   if (integer_zerop (iv0->step))
1331     {
1332       /* Control variable may be on the other side.  */
1333       tmp_op = op0; op0 = op1; op1 = tmp_op;
1334       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1335     }
1336   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1337
1338 end:
1339   if (control_var)
1340     *control_var = op0;;
1341   if (iv_var)
1342     *iv_var = iv0;;
1343   if (bound)
1344     *bound = op1;
1345   if (iv_bound)
1346     *iv_bound = iv1;
1347
1348   return ret;
1349 }
1350
1351 /* Checks whether the condition in STMT is interesting and if so,
1352    records it.  */
1353
1354 static void
1355 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1356 {
1357   tree *var_p, *bound_p;
1358   struct iv *var_iv, *civ;
1359
1360   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1361     {
1362       find_interesting_uses_op (data, *var_p);
1363       find_interesting_uses_op (data, *bound_p);
1364       return;
1365     }
1366
1367   civ = XNEW (struct iv);
1368   *civ = *var_iv;
1369   record_use (data, NULL, civ, stmt, USE_COMPARE);
1370 }
1371
1372 /* Returns the outermost loop EXPR is obviously invariant in
1373    relative to the loop LOOP, i.e. if all its operands are defined
1374    outside of the returned loop.  Returns NULL if EXPR is not
1375    even obviously invariant in LOOP.  */
1376
1377 struct loop *
1378 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1379 {
1380   basic_block def_bb;
1381   unsigned i, len;
1382
1383   if (is_gimple_min_invariant (expr))
1384     return current_loops->tree_root;
1385
1386   if (TREE_CODE (expr) == SSA_NAME)
1387     {
1388       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1389       if (def_bb)
1390         {
1391           if (flow_bb_inside_loop_p (loop, def_bb))
1392             return NULL;
1393           return superloop_at_depth (loop,
1394                                      loop_depth (def_bb->loop_father) + 1);
1395         }
1396
1397       return current_loops->tree_root;
1398     }
1399
1400   if (!EXPR_P (expr))
1401     return NULL;
1402
1403   unsigned maxdepth = 0;
1404   len = TREE_OPERAND_LENGTH (expr);
1405   for (i = 0; i < len; i++)
1406     {
1407       struct loop *ivloop;
1408       if (!TREE_OPERAND (expr, i))
1409         continue;
1410
1411       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1412       if (!ivloop)
1413         return NULL;
1414       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1415     }
1416
1417   return superloop_at_depth (loop, maxdepth);
1418 }
1419
1420 /* Returns true if expression EXPR is obviously invariant in LOOP,
1421    i.e. if all its operands are defined outside of the LOOP.  LOOP
1422    should not be the function body.  */
1423
1424 bool
1425 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1426 {
1427   basic_block def_bb;
1428   unsigned i, len;
1429
1430   gcc_assert (loop_depth (loop) > 0);
1431
1432   if (is_gimple_min_invariant (expr))
1433     return true;
1434
1435   if (TREE_CODE (expr) == SSA_NAME)
1436     {
1437       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1438       if (def_bb
1439           && flow_bb_inside_loop_p (loop, def_bb))
1440         return false;
1441
1442       return true;
1443     }
1444
1445   if (!EXPR_P (expr))
1446     return false;
1447
1448   len = TREE_OPERAND_LENGTH (expr);
1449   for (i = 0; i < len; i++)
1450     if (TREE_OPERAND (expr, i)
1451         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1452       return false;
1453
1454   return true;
1455 }
1456
1457 /* Returns true if statement STMT is obviously invariant in LOOP,
1458    i.e. if all its operands on the RHS are defined outside of the LOOP.
1459    LOOP should not be the function body.  */
1460
1461 bool
1462 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1463 {
1464   unsigned i;
1465   tree lhs;
1466
1467   gcc_assert (loop_depth (loop) > 0);
1468
1469   lhs = gimple_get_lhs (stmt);
1470   for (i = 0; i < gimple_num_ops (stmt); i++)
1471     {
1472       tree op = gimple_op (stmt, i);
1473       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1474         return false;
1475     }
1476
1477   return true;
1478 }
1479
1480 /* Cumulates the steps of indices into DATA and replaces their values with the
1481    initial ones.  Returns false when the value of the index cannot be determined.
1482    Callback for for_each_index.  */
1483
1484 struct ifs_ivopts_data
1485 {
1486   struct ivopts_data *ivopts_data;
1487   gimple stmt;
1488   tree step;
1489 };
1490
1491 static bool
1492 idx_find_step (tree base, tree *idx, void *data)
1493 {
1494   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1495   struct iv *iv;
1496   tree step, iv_base, iv_step, lbound, off;
1497   struct loop *loop = dta->ivopts_data->current_loop;
1498
1499   /* If base is a component ref, require that the offset of the reference
1500      be invariant.  */
1501   if (TREE_CODE (base) == COMPONENT_REF)
1502     {
1503       off = component_ref_field_offset (base);
1504       return expr_invariant_in_loop_p (loop, off);
1505     }
1506
1507   /* If base is array, first check whether we will be able to move the
1508      reference out of the loop (in order to take its address in strength
1509      reduction).  In order for this to work we need both lower bound
1510      and step to be loop invariants.  */
1511   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1512     {
1513       /* Moreover, for a range, the size needs to be invariant as well.  */
1514       if (TREE_CODE (base) == ARRAY_RANGE_REF
1515           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1516         return false;
1517
1518       step = array_ref_element_size (base);
1519       lbound = array_ref_low_bound (base);
1520
1521       if (!expr_invariant_in_loop_p (loop, step)
1522           || !expr_invariant_in_loop_p (loop, lbound))
1523         return false;
1524     }
1525
1526   if (TREE_CODE (*idx) != SSA_NAME)
1527     return true;
1528
1529   iv = get_iv (dta->ivopts_data, *idx);
1530   if (!iv)
1531     return false;
1532
1533   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1534           *&x[0], which is not folded and does not trigger the
1535           ARRAY_REF path below.  */
1536   *idx = iv->base;
1537
1538   if (integer_zerop (iv->step))
1539     return true;
1540
1541   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1542     {
1543       step = array_ref_element_size (base);
1544
1545       /* We only handle addresses whose step is an integer constant.  */
1546       if (TREE_CODE (step) != INTEGER_CST)
1547         return false;
1548     }
1549   else
1550     /* The step for pointer arithmetics already is 1 byte.  */
1551     step = size_one_node;
1552
1553   iv_base = iv->base;
1554   iv_step = iv->step;
1555   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1556                             sizetype, &iv_base, &iv_step, dta->stmt,
1557                             false))
1558     {
1559       /* The index might wrap.  */
1560       return false;
1561     }
1562
1563   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1564   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1565
1566   return true;
1567 }
1568
1569 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1570    object is passed to it in DATA.  */
1571
1572 static bool
1573 idx_record_use (tree base, tree *idx,
1574                 void *vdata)
1575 {
1576   struct ivopts_data *data = (struct ivopts_data *) vdata;
1577   find_interesting_uses_op (data, *idx);
1578   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1579     {
1580       find_interesting_uses_op (data, array_ref_element_size (base));
1581       find_interesting_uses_op (data, array_ref_low_bound (base));
1582     }
1583   return true;
1584 }
1585
1586 /* If we can prove that TOP = cst * BOT for some constant cst,
1587    store cst to MUL and return true.  Otherwise return false.
1588    The returned value is always sign-extended, regardless of the
1589    signedness of TOP and BOT.  */
1590
1591 static bool
1592 constant_multiple_of (tree top, tree bot, double_int *mul)
1593 {
1594   tree mby;
1595   enum tree_code code;
1596   double_int res, p0, p1;
1597   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1598
1599   STRIP_NOPS (top);
1600   STRIP_NOPS (bot);
1601
1602   if (operand_equal_p (top, bot, 0))
1603     {
1604       *mul = double_int_one;
1605       return true;
1606     }
1607
1608   code = TREE_CODE (top);
1609   switch (code)
1610     {
1611     case MULT_EXPR:
1612       mby = TREE_OPERAND (top, 1);
1613       if (TREE_CODE (mby) != INTEGER_CST)
1614         return false;
1615
1616       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1617         return false;
1618
1619       *mul = (res * tree_to_double_int (mby)).sext (precision);
1620       return true;
1621
1622     case PLUS_EXPR:
1623     case MINUS_EXPR:
1624       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1625           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1626         return false;
1627
1628       if (code == MINUS_EXPR)
1629         p1 = -p1;
1630       *mul = (p0 + p1).sext (precision);
1631       return true;
1632
1633     case INTEGER_CST:
1634       if (TREE_CODE (bot) != INTEGER_CST)
1635         return false;
1636
1637       p0 = tree_to_double_int (top).sext (precision);
1638       p1 = tree_to_double_int (bot).sext (precision);
1639       if (p1.is_zero ())
1640         return false;
1641       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1642       return res.is_zero ();
1643
1644     default:
1645       return false;
1646     }
1647 }
1648
1649 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1650
1651 static bool
1652 may_be_unaligned_p (tree ref, tree step)
1653 {
1654   tree base;
1655   tree base_type;
1656   HOST_WIDE_INT bitsize;
1657   HOST_WIDE_INT bitpos;
1658   tree toffset;
1659   enum machine_mode mode;
1660   int unsignedp, volatilep;
1661   unsigned base_align;
1662
1663   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1664      thus they are not misaligned.  */
1665   if (TREE_CODE (ref) == TARGET_MEM_REF)
1666     return false;
1667
1668   /* The test below is basically copy of what expr.c:normal_inner_ref
1669      does to check whether the object must be loaded by parts when
1670      STRICT_ALIGNMENT is true.  */
1671   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1672                               &unsignedp, &volatilep, true);
1673   base_type = TREE_TYPE (base);
1674   base_align = get_object_alignment (base);
1675   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1676
1677   if (mode != BLKmode)
1678     {
1679       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1680
1681       if (base_align < mode_align
1682           || (bitpos % mode_align) != 0
1683           || (bitpos % BITS_PER_UNIT) != 0)
1684         return true;
1685
1686       if (toffset
1687           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1688         return true;
1689
1690       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1691         return true;
1692     }
1693
1694   return false;
1695 }
1696
1697 /* Return true if EXPR may be non-addressable.   */
1698
1699 bool
1700 may_be_nonaddressable_p (tree expr)
1701 {
1702   switch (TREE_CODE (expr))
1703     {
1704     case TARGET_MEM_REF:
1705       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1706          target, thus they are always addressable.  */
1707       return false;
1708
1709     case COMPONENT_REF:
1710       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1711              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1712
1713     case VIEW_CONVERT_EXPR:
1714       /* This kind of view-conversions may wrap non-addressable objects
1715          and make them look addressable.  After some processing the
1716          non-addressability may be uncovered again, causing ADDR_EXPRs
1717          of inappropriate objects to be built.  */
1718       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1719           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1720         return true;
1721
1722       /* ... fall through ... */
1723
1724     case ARRAY_REF:
1725     case ARRAY_RANGE_REF:
1726       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1727
1728     CASE_CONVERT:
1729       return true;
1730
1731     default:
1732       break;
1733     }
1734
1735   return false;
1736 }
1737
1738 /* Finds addresses in *OP_P inside STMT.  */
1739
1740 static void
1741 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1742 {
1743   tree base = *op_p, step = size_zero_node;
1744   struct iv *civ;
1745   struct ifs_ivopts_data ifs_ivopts_data;
1746
1747   /* Do not play with volatile memory references.  A bit too conservative,
1748      perhaps, but safe.  */
1749   if (gimple_has_volatile_ops (stmt))
1750     goto fail;
1751
1752   /* Ignore bitfields for now.  Not really something terribly complicated
1753      to handle.  TODO.  */
1754   if (TREE_CODE (base) == BIT_FIELD_REF)
1755     goto fail;
1756
1757   base = unshare_expr (base);
1758
1759   if (TREE_CODE (base) == TARGET_MEM_REF)
1760     {
1761       tree type = build_pointer_type (TREE_TYPE (base));
1762       tree astep;
1763
1764       if (TMR_BASE (base)
1765           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1766         {
1767           civ = get_iv (data, TMR_BASE (base));
1768           if (!civ)
1769             goto fail;
1770
1771           TMR_BASE (base) = civ->base;
1772           step = civ->step;
1773         }
1774       if (TMR_INDEX2 (base)
1775           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1776         {
1777           civ = get_iv (data, TMR_INDEX2 (base));
1778           if (!civ)
1779             goto fail;
1780
1781           TMR_INDEX2 (base) = civ->base;
1782           step = civ->step;
1783         }
1784       if (TMR_INDEX (base)
1785           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1786         {
1787           civ = get_iv (data, TMR_INDEX (base));
1788           if (!civ)
1789             goto fail;
1790
1791           TMR_INDEX (base) = civ->base;
1792           astep = civ->step;
1793
1794           if (astep)
1795             {
1796               if (TMR_STEP (base))
1797                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1798
1799               step = fold_build2 (PLUS_EXPR, type, step, astep);
1800             }
1801         }
1802
1803       if (integer_zerop (step))
1804         goto fail;
1805       base = tree_mem_ref_addr (type, base);
1806     }
1807   else
1808     {
1809       ifs_ivopts_data.ivopts_data = data;
1810       ifs_ivopts_data.stmt = stmt;
1811       ifs_ivopts_data.step = size_zero_node;
1812       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1813           || integer_zerop (ifs_ivopts_data.step))
1814         goto fail;
1815       step = ifs_ivopts_data.step;
1816
1817       /* Check that the base expression is addressable.  This needs
1818          to be done after substituting bases of IVs into it.  */
1819       if (may_be_nonaddressable_p (base))
1820         goto fail;
1821
1822       /* Moreover, on strict alignment platforms, check that it is
1823          sufficiently aligned.  */
1824       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1825         goto fail;
1826
1827       base = build_fold_addr_expr (base);
1828
1829       /* Substituting bases of IVs into the base expression might
1830          have caused folding opportunities.  */
1831       if (TREE_CODE (base) == ADDR_EXPR)
1832         {
1833           tree *ref = &TREE_OPERAND (base, 0);
1834           while (handled_component_p (*ref))
1835             ref = &TREE_OPERAND (*ref, 0);
1836           if (TREE_CODE (*ref) == MEM_REF)
1837             {
1838               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1839                                       TREE_OPERAND (*ref, 0),
1840                                       TREE_OPERAND (*ref, 1));
1841               if (tem)
1842                 *ref = tem;
1843             }
1844         }
1845     }
1846
1847   civ = alloc_iv (base, step);
1848   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1849   return;
1850
1851 fail:
1852   for_each_index (op_p, idx_record_use, data);
1853 }
1854
1855 /* Finds and records invariants used in STMT.  */
1856
1857 static void
1858 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1859 {
1860   ssa_op_iter iter;
1861   use_operand_p use_p;
1862   tree op;
1863
1864   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1865     {
1866       op = USE_FROM_PTR (use_p);
1867       record_invariant (data, op, false);
1868     }
1869 }
1870
1871 /* Finds interesting uses of induction variables in the statement STMT.  */
1872
1873 static void
1874 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1875 {
1876   struct iv *iv;
1877   tree op, *lhs, *rhs;
1878   ssa_op_iter iter;
1879   use_operand_p use_p;
1880   enum tree_code code;
1881
1882   find_invariants_stmt (data, stmt);
1883
1884   if (gimple_code (stmt) == GIMPLE_COND)
1885     {
1886       find_interesting_uses_cond (data, stmt);
1887       return;
1888     }
1889
1890   if (is_gimple_assign (stmt))
1891     {
1892       lhs = gimple_assign_lhs_ptr (stmt);
1893       rhs = gimple_assign_rhs1_ptr (stmt);
1894
1895       if (TREE_CODE (*lhs) == SSA_NAME)
1896         {
1897           /* If the statement defines an induction variable, the uses are not
1898              interesting by themselves.  */
1899
1900           iv = get_iv (data, *lhs);
1901
1902           if (iv && !integer_zerop (iv->step))
1903             return;
1904         }
1905
1906       code = gimple_assign_rhs_code (stmt);
1907       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1908           && (REFERENCE_CLASS_P (*rhs)
1909               || is_gimple_val (*rhs)))
1910         {
1911           if (REFERENCE_CLASS_P (*rhs))
1912             find_interesting_uses_address (data, stmt, rhs);
1913           else
1914             find_interesting_uses_op (data, *rhs);
1915
1916           if (REFERENCE_CLASS_P (*lhs))
1917             find_interesting_uses_address (data, stmt, lhs);
1918           return;
1919         }
1920       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1921         {
1922           find_interesting_uses_cond (data, stmt);
1923           return;
1924         }
1925
1926       /* TODO -- we should also handle address uses of type
1927
1928          memory = call (whatever);
1929
1930          and
1931
1932          call (memory).  */
1933     }
1934
1935   if (gimple_code (stmt) == GIMPLE_PHI
1936       && gimple_bb (stmt) == data->current_loop->header)
1937     {
1938       iv = get_iv (data, PHI_RESULT (stmt));
1939
1940       if (iv && !integer_zerop (iv->step))
1941         return;
1942     }
1943
1944   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1945     {
1946       op = USE_FROM_PTR (use_p);
1947
1948       if (TREE_CODE (op) != SSA_NAME)
1949         continue;
1950
1951       iv = get_iv (data, op);
1952       if (!iv)
1953         continue;
1954
1955       find_interesting_uses_op (data, op);
1956     }
1957 }
1958
1959 /* Finds interesting uses of induction variables outside of loops
1960    on loop exit edge EXIT.  */
1961
1962 static void
1963 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1964 {
1965   gimple phi;
1966   gimple_stmt_iterator psi;
1967   tree def;
1968
1969   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1970     {
1971       phi = gsi_stmt (psi);
1972       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1973       if (!virtual_operand_p (def))
1974         find_interesting_uses_op (data, def);
1975     }
1976 }
1977
1978 /* Finds uses of the induction variables that are interesting.  */
1979
1980 static void
1981 find_interesting_uses (struct ivopts_data *data)
1982 {
1983   basic_block bb;
1984   gimple_stmt_iterator bsi;
1985   basic_block *body = get_loop_body (data->current_loop);
1986   unsigned i;
1987   struct version_info *info;
1988   edge e;
1989
1990   if (dump_file && (dump_flags & TDF_DETAILS))
1991     fprintf (dump_file, "Uses:\n\n");
1992
1993   for (i = 0; i < data->current_loop->num_nodes; i++)
1994     {
1995       edge_iterator ei;
1996       bb = body[i];
1997
1998       FOR_EACH_EDGE (e, ei, bb->succs)
1999         if (e->dest != EXIT_BLOCK_PTR
2000             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2001           find_interesting_uses_outside (data, e);
2002
2003       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2004         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2005       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2006         if (!is_gimple_debug (gsi_stmt (bsi)))
2007           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2008     }
2009
2010   if (dump_file && (dump_flags & TDF_DETAILS))
2011     {
2012       bitmap_iterator bi;
2013
2014       fprintf (dump_file, "\n");
2015
2016       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2017         {
2018           info = ver_info (data, i);
2019           if (info->inv_id)
2020             {
2021               fprintf (dump_file, "  ");
2022               print_generic_expr (dump_file, info->name, TDF_SLIM);
2023               fprintf (dump_file, " is invariant (%d)%s\n",
2024                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2025             }
2026         }
2027
2028       fprintf (dump_file, "\n");
2029     }
2030
2031   free (body);
2032 }
2033
2034 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2035    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2036    we are at the top-level of the processed address.  */
2037
2038 static tree
2039 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2040                 unsigned HOST_WIDE_INT *offset)
2041 {
2042   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2043   enum tree_code code;
2044   tree type, orig_type = TREE_TYPE (expr);
2045   unsigned HOST_WIDE_INT off0, off1, st;
2046   tree orig_expr = expr;
2047
2048   STRIP_NOPS (expr);
2049
2050   type = TREE_TYPE (expr);
2051   code = TREE_CODE (expr);
2052   *offset = 0;
2053
2054   switch (code)
2055     {
2056     case INTEGER_CST:
2057       if (!cst_and_fits_in_hwi (expr)
2058           || integer_zerop (expr))
2059         return orig_expr;
2060
2061       *offset = int_cst_value (expr);
2062       return build_int_cst (orig_type, 0);
2063
2064     case POINTER_PLUS_EXPR:
2065     case PLUS_EXPR:
2066     case MINUS_EXPR:
2067       op0 = TREE_OPERAND (expr, 0);
2068       op1 = TREE_OPERAND (expr, 1);
2069
2070       op0 = strip_offset_1 (op0, false, false, &off0);
2071       op1 = strip_offset_1 (op1, false, false, &off1);
2072
2073       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2074       if (op0 == TREE_OPERAND (expr, 0)
2075           && op1 == TREE_OPERAND (expr, 1))
2076         return orig_expr;
2077
2078       if (integer_zerop (op1))
2079         expr = op0;
2080       else if (integer_zerop (op0))
2081         {
2082           if (code == MINUS_EXPR)
2083             expr = fold_build1 (NEGATE_EXPR, type, op1);
2084           else
2085             expr = op1;
2086         }
2087       else
2088         expr = fold_build2 (code, type, op0, op1);
2089
2090       return fold_convert (orig_type, expr);
2091
2092     case MULT_EXPR:
2093       op1 = TREE_OPERAND (expr, 1);
2094       if (!cst_and_fits_in_hwi (op1))
2095         return orig_expr;
2096
2097       op0 = TREE_OPERAND (expr, 0);
2098       op0 = strip_offset_1 (op0, false, false, &off0);
2099       if (op0 == TREE_OPERAND (expr, 0))
2100         return orig_expr;
2101
2102       *offset = off0 * int_cst_value (op1);
2103       if (integer_zerop (op0))
2104         expr = op0;
2105       else
2106         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2107
2108       return fold_convert (orig_type, expr);
2109
2110     case ARRAY_REF:
2111     case ARRAY_RANGE_REF:
2112       if (!inside_addr)
2113         return orig_expr;
2114
2115       step = array_ref_element_size (expr);
2116       if (!cst_and_fits_in_hwi (step))
2117         break;
2118
2119       st = int_cst_value (step);
2120       op1 = TREE_OPERAND (expr, 1);
2121       op1 = strip_offset_1 (op1, false, false, &off1);
2122       *offset = off1 * st;
2123
2124       if (top_compref
2125           && integer_zerop (op1))
2126         {
2127           /* Strip the component reference completely.  */
2128           op0 = TREE_OPERAND (expr, 0);
2129           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2130           *offset += off0;
2131           return op0;
2132         }
2133       break;
2134
2135     case COMPONENT_REF:
2136       if (!inside_addr)
2137         return orig_expr;
2138
2139       tmp = component_ref_field_offset (expr);
2140       if (top_compref
2141           && cst_and_fits_in_hwi (tmp))
2142         {
2143           /* Strip the component reference completely.  */
2144           op0 = TREE_OPERAND (expr, 0);
2145           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2146           *offset = off0 + int_cst_value (tmp);
2147           return op0;
2148         }
2149       break;
2150
2151     case ADDR_EXPR:
2152       op0 = TREE_OPERAND (expr, 0);
2153       op0 = strip_offset_1 (op0, true, true, &off0);
2154       *offset += off0;
2155
2156       if (op0 == TREE_OPERAND (expr, 0))
2157         return orig_expr;
2158
2159       expr = build_fold_addr_expr (op0);
2160       return fold_convert (orig_type, expr);
2161
2162     case MEM_REF:
2163       /* ???  Offset operand?  */
2164       inside_addr = false;
2165       break;
2166
2167     default:
2168       return orig_expr;
2169     }
2170
2171   /* Default handling of expressions for that we want to recurse into
2172      the first operand.  */
2173   op0 = TREE_OPERAND (expr, 0);
2174   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2175   *offset += off0;
2176
2177   if (op0 == TREE_OPERAND (expr, 0)
2178       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2179     return orig_expr;
2180
2181   expr = copy_node (expr);
2182   TREE_OPERAND (expr, 0) = op0;
2183   if (op1)
2184     TREE_OPERAND (expr, 1) = op1;
2185
2186   /* Inside address, we might strip the top level component references,
2187      thus changing type of the expression.  Handling of ADDR_EXPR
2188      will fix that.  */
2189   expr = fold_convert (orig_type, expr);
2190
2191   return expr;
2192 }
2193
2194 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2195
2196 static tree
2197 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2198 {
2199   return strip_offset_1 (expr, false, false, offset);
2200 }
2201
2202 /* Returns variant of TYPE that can be used as base for different uses.
2203    We return unsigned type with the same precision, which avoids problems
2204    with overflows.  */
2205
2206 static tree
2207 generic_type_for (tree type)
2208 {
2209   if (POINTER_TYPE_P (type))
2210     return unsigned_type_for (type);
2211
2212   if (TYPE_UNSIGNED (type))
2213     return type;
2214
2215   return unsigned_type_for (type);
2216 }
2217
2218 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2219    the bitmap to that we should store it.  */
2220
2221 static struct ivopts_data *fd_ivopts_data;
2222 static tree
2223 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2224 {
2225   bitmap *depends_on = (bitmap *) data;
2226   struct version_info *info;
2227
2228   if (TREE_CODE (*expr_p) != SSA_NAME)
2229     return NULL_TREE;
2230   info = name_info (fd_ivopts_data, *expr_p);
2231
2232   if (!info->inv_id || info->has_nonlin_use)
2233     return NULL_TREE;
2234
2235   if (!*depends_on)
2236     *depends_on = BITMAP_ALLOC (NULL);
2237   bitmap_set_bit (*depends_on, info->inv_id);
2238
2239   return NULL_TREE;
2240 }
2241
2242 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2243    position to POS.  If USE is not NULL, the candidate is set as related to
2244    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2245    replacement of the final value of the iv by a direct computation.  */
2246
2247 static struct iv_cand *
2248 add_candidate_1 (struct ivopts_data *data,
2249                  tree base, tree step, bool important, enum iv_position pos,
2250                  struct iv_use *use, gimple incremented_at)
2251 {
2252   unsigned i;
2253   struct iv_cand *cand = NULL;
2254   tree type, orig_type;
2255
2256   /* For non-original variables, make sure their values are computed in a type
2257      that does not invoke undefined behavior on overflows (since in general,
2258      we cannot prove that these induction variables are non-wrapping).  */
2259   if (pos != IP_ORIGINAL)
2260     {
2261       orig_type = TREE_TYPE (base);
2262       type = generic_type_for (orig_type);
2263       if (type != orig_type)
2264         {
2265           base = fold_convert (type, base);
2266           step = fold_convert (type, step);
2267         }
2268     }
2269
2270   for (i = 0; i < n_iv_cands (data); i++)
2271     {
2272       cand = iv_cand (data, i);
2273
2274       if (cand->pos != pos)
2275         continue;
2276
2277       if (cand->incremented_at != incremented_at
2278           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2279               && cand->ainc_use != use))
2280         continue;
2281
2282       if (!cand->iv)
2283         {
2284           if (!base && !step)
2285             break;
2286
2287           continue;
2288         }
2289
2290       if (!base && !step)
2291         continue;
2292
2293       if (operand_equal_p (base, cand->iv->base, 0)
2294           && operand_equal_p (step, cand->iv->step, 0)
2295           && (TYPE_PRECISION (TREE_TYPE (base))
2296               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2297         break;
2298     }
2299
2300   if (i == n_iv_cands (data))
2301     {
2302       cand = XCNEW (struct iv_cand);
2303       cand->id = i;
2304
2305       if (!base && !step)
2306         cand->iv = NULL;
2307       else
2308         cand->iv = alloc_iv (base, step);
2309
2310       cand->pos = pos;
2311       if (pos != IP_ORIGINAL && cand->iv)
2312         {
2313           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2314           cand->var_after = cand->var_before;
2315         }
2316       cand->important = important;
2317       cand->incremented_at = incremented_at;
2318       data->iv_candidates.safe_push (cand);
2319
2320       if (step
2321           && TREE_CODE (step) != INTEGER_CST)
2322         {
2323           fd_ivopts_data = data;
2324           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2325         }
2326
2327       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2328         cand->ainc_use = use;
2329       else
2330         cand->ainc_use = NULL;
2331
2332       if (dump_file && (dump_flags & TDF_DETAILS))
2333         dump_cand (dump_file, cand);
2334     }
2335
2336   if (important && !cand->important)
2337     {
2338       cand->important = true;
2339       if (dump_file && (dump_flags & TDF_DETAILS))
2340         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2341     }
2342
2343   if (use)
2344     {
2345       bitmap_set_bit (use->related_cands, i);
2346       if (dump_file && (dump_flags & TDF_DETAILS))
2347         fprintf (dump_file, "Candidate %d is related to use %d\n",
2348                  cand->id, use->id);
2349     }
2350
2351   return cand;
2352 }
2353
2354 /* Returns true if incrementing the induction variable at the end of the LOOP
2355    is allowed.
2356
2357    The purpose is to avoid splitting latch edge with a biv increment, thus
2358    creating a jump, possibly confusing other optimization passes and leaving
2359    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2360    is not available (so we do not have a better alternative), or if the latch
2361    edge is already nonempty.  */
2362
2363 static bool
2364 allow_ip_end_pos_p (struct loop *loop)
2365 {
2366   if (!ip_normal_pos (loop))
2367     return true;
2368
2369   if (!empty_block_p (ip_end_pos (loop)))
2370     return true;
2371
2372   return false;
2373 }
2374
2375 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2376    Important field is set to IMPORTANT.  */
2377
2378 static void
2379 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2380                         bool important, struct iv_use *use)
2381 {
2382   basic_block use_bb = gimple_bb (use->stmt);
2383   enum machine_mode mem_mode;
2384   unsigned HOST_WIDE_INT cstepi;
2385
2386   /* If we insert the increment in any position other than the standard
2387      ones, we must ensure that it is incremented once per iteration.
2388      It must not be in an inner nested loop, or one side of an if
2389      statement.  */
2390   if (use_bb->loop_father != data->current_loop
2391       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2392       || stmt_could_throw_p (use->stmt)
2393       || !cst_and_fits_in_hwi (step))
2394     return;
2395
2396   cstepi = int_cst_value (step);
2397
2398   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2399   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2400         || USE_STORE_PRE_INCREMENT (mem_mode))
2401        && GET_MODE_SIZE (mem_mode) == cstepi)
2402       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2403            || USE_STORE_PRE_DECREMENT (mem_mode))
2404           && GET_MODE_SIZE (mem_mode) == -cstepi))
2405     {
2406       enum tree_code code = MINUS_EXPR;
2407       tree new_base;
2408       tree new_step = step;
2409
2410       if (POINTER_TYPE_P (TREE_TYPE (base)))
2411         {
2412           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2413           code = POINTER_PLUS_EXPR;
2414         }
2415       else
2416         new_step = fold_convert (TREE_TYPE (base), new_step);
2417       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2418       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2419                        use->stmt);
2420     }
2421   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2422         || USE_STORE_POST_INCREMENT (mem_mode))
2423        && GET_MODE_SIZE (mem_mode) == cstepi)
2424       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2425            || USE_STORE_POST_DECREMENT (mem_mode))
2426           && GET_MODE_SIZE (mem_mode) == -cstepi))
2427     {
2428       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2429                        use->stmt);
2430     }
2431 }
2432
2433 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2434    position to POS.  If USE is not NULL, the candidate is set as related to
2435    it.  The candidate computation is scheduled on all available positions.  */
2436
2437 static void
2438 add_candidate (struct ivopts_data *data,
2439                tree base, tree step, bool important, struct iv_use *use)
2440 {
2441   if (ip_normal_pos (data->current_loop))
2442     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2443   if (ip_end_pos (data->current_loop)
2444       && allow_ip_end_pos_p (data->current_loop))
2445     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2446
2447   if (use != NULL && use->type == USE_ADDRESS)
2448     add_autoinc_candidates (data, base, step, important, use);
2449 }
2450
2451 /* Adds standard iv candidates.  */
2452
2453 static void
2454 add_standard_iv_candidates (struct ivopts_data *data)
2455 {
2456   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2457
2458   /* The same for a double-integer type if it is still fast enough.  */
2459   if (TYPE_PRECISION
2460         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2461       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2462     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2463                    build_int_cst (long_integer_type_node, 1), true, NULL);
2464
2465   /* The same for a double-integer type if it is still fast enough.  */
2466   if (TYPE_PRECISION
2467         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2468       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2469     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2470                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2471 }
2472
2473
2474 /* Adds candidates bases on the old induction variable IV.  */
2475
2476 static void
2477 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2478 {
2479   gimple phi;
2480   tree def;
2481   struct iv_cand *cand;
2482
2483   add_candidate (data, iv->base, iv->step, true, NULL);
2484
2485   /* The same, but with initial value zero.  */
2486   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2487     add_candidate (data, size_int (0), iv->step, true, NULL);
2488   else
2489     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2490                    iv->step, true, NULL);
2491
2492   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2493   if (gimple_code (phi) == GIMPLE_PHI)
2494     {
2495       /* Additionally record the possibility of leaving the original iv
2496          untouched.  */
2497       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2498       cand = add_candidate_1 (data,
2499                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2500                               SSA_NAME_DEF_STMT (def));
2501       cand->var_before = iv->ssa_name;
2502       cand->var_after = def;
2503     }
2504 }
2505
2506 /* Adds candidates based on the old induction variables.  */
2507
2508 static void
2509 add_old_ivs_candidates (struct ivopts_data *data)
2510 {
2511   unsigned i;
2512   struct iv *iv;
2513   bitmap_iterator bi;
2514
2515   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2516     {
2517       iv = ver_info (data, i)->iv;
2518       if (iv && iv->biv_p && !integer_zerop (iv->step))
2519         add_old_iv_candidates (data, iv);
2520     }
2521 }
2522
2523 /* Adds candidates based on the value of the induction variable IV and USE.  */
2524
2525 static void
2526 add_iv_value_candidates (struct ivopts_data *data,
2527                          struct iv *iv, struct iv_use *use)
2528 {
2529   unsigned HOST_WIDE_INT offset;
2530   tree base;
2531   tree basetype;
2532
2533   add_candidate (data, iv->base, iv->step, false, use);
2534
2535   /* The same, but with initial value zero.  Make such variable important,
2536      since it is generic enough so that possibly many uses may be based
2537      on it.  */
2538   basetype = TREE_TYPE (iv->base);
2539   if (POINTER_TYPE_P (basetype))
2540     basetype = sizetype;
2541   add_candidate (data, build_int_cst (basetype, 0),
2542                  iv->step, true, use);
2543
2544   /* Third, try removing the constant offset.  Make sure to even
2545      add a candidate for &a[0] vs. (T *)&a.  */
2546   base = strip_offset (iv->base, &offset);
2547   if (offset
2548       || base != iv->base)
2549     add_candidate (data, base, iv->step, false, use);
2550 }
2551
2552 /* Adds candidates based on the uses.  */
2553
2554 static void
2555 add_derived_ivs_candidates (struct ivopts_data *data)
2556 {
2557   unsigned i;
2558
2559   for (i = 0; i < n_iv_uses (data); i++)
2560     {
2561       struct iv_use *use = iv_use (data, i);
2562
2563       if (!use)
2564         continue;
2565
2566       switch (use->type)
2567         {
2568         case USE_NONLINEAR_EXPR:
2569         case USE_COMPARE:
2570         case USE_ADDRESS:
2571           /* Just add the ivs based on the value of the iv used here.  */
2572           add_iv_value_candidates (data, use->iv, use);
2573           break;
2574
2575         default:
2576           gcc_unreachable ();
2577         }
2578     }
2579 }
2580
2581 /* Record important candidates and add them to related_cands bitmaps
2582    if needed.  */
2583
2584 static void
2585 record_important_candidates (struct ivopts_data *data)
2586 {
2587   unsigned i;
2588   struct iv_use *use;
2589
2590   for (i = 0; i < n_iv_cands (data); i++)
2591     {
2592       struct iv_cand *cand = iv_cand (data, i);
2593
2594       if (cand->important)
2595         bitmap_set_bit (data->important_candidates, i);
2596     }
2597
2598   data->consider_all_candidates = (n_iv_cands (data)
2599                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2600
2601   if (data->consider_all_candidates)
2602     {
2603       /* We will not need "related_cands" bitmaps in this case,
2604          so release them to decrease peak memory consumption.  */
2605       for (i = 0; i < n_iv_uses (data); i++)
2606         {
2607           use = iv_use (data, i);
2608           BITMAP_FREE (use->related_cands);
2609         }
2610     }
2611   else
2612     {
2613       /* Add important candidates to the related_cands bitmaps.  */
2614       for (i = 0; i < n_iv_uses (data); i++)
2615         bitmap_ior_into (iv_use (data, i)->related_cands,
2616                          data->important_candidates);
2617     }
2618 }
2619
2620 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2621    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2622    we allocate a simple list to every use.  */
2623
2624 static void
2625 alloc_use_cost_map (struct ivopts_data *data)
2626 {
2627   unsigned i, size, s;
2628
2629   for (i = 0; i < n_iv_uses (data); i++)
2630     {
2631       struct iv_use *use = iv_use (data, i);
2632
2633       if (data->consider_all_candidates)
2634         size = n_iv_cands (data);
2635       else
2636         {
2637           s = bitmap_count_bits (use->related_cands);
2638
2639           /* Round up to the power of two, so that moduling by it is fast.  */
2640           size = s ? (1 << ceil_log2 (s)) : 1;
2641         }
2642
2643       use->n_map_members = size;
2644       use->cost_map = XCNEWVEC (struct cost_pair, size);
2645     }
2646 }
2647
2648 /* Returns description of computation cost of expression whose runtime
2649    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2650
2651 static comp_cost
2652 new_cost (unsigned runtime, unsigned complexity)
2653 {
2654   comp_cost cost;
2655
2656   cost.cost = runtime;
2657   cost.complexity = complexity;
2658
2659   return cost;
2660 }
2661
2662 /* Adds costs COST1 and COST2.  */
2663
2664 static comp_cost
2665 add_costs (comp_cost cost1, comp_cost cost2)
2666 {
2667   cost1.cost += cost2.cost;
2668   cost1.complexity += cost2.complexity;
2669
2670   return cost1;
2671 }
2672 /* Subtracts costs COST1 and COST2.  */
2673
2674 static comp_cost
2675 sub_costs (comp_cost cost1, comp_cost cost2)
2676 {
2677   cost1.cost -= cost2.cost;
2678   cost1.complexity -= cost2.complexity;
2679
2680   return cost1;
2681 }
2682
2683 /* Returns a negative number if COST1 < COST2, a positive number if
2684    COST1 > COST2, and 0 if COST1 = COST2.  */
2685
2686 static int
2687 compare_costs (comp_cost cost1, comp_cost cost2)
2688 {
2689   if (cost1.cost == cost2.cost)
2690     return cost1.complexity - cost2.complexity;
2691
2692   return cost1.cost - cost2.cost;
2693 }
2694
2695 /* Returns true if COST is infinite.  */
2696
2697 static bool
2698 infinite_cost_p (comp_cost cost)
2699 {
2700   return cost.cost == INFTY;
2701 }
2702
2703 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2704    on invariants DEPENDS_ON and that the value used in expressing it
2705    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2706
2707 static void
2708 set_use_iv_cost (struct ivopts_data *data,
2709                  struct iv_use *use, struct iv_cand *cand,
2710                  comp_cost cost, bitmap depends_on, tree value,
2711                  enum tree_code comp, int inv_expr_id)
2712 {
2713   unsigned i, s;
2714
2715   if (infinite_cost_p (cost))
2716     {
2717       BITMAP_FREE (depends_on);
2718       return;
2719     }
2720
2721   if (data->consider_all_candidates)
2722     {
2723       use->cost_map[cand->id].cand = cand;
2724       use->cost_map[cand->id].cost = cost;
2725       use->cost_map[cand->id].depends_on = depends_on;
2726       use->cost_map[cand->id].value = value;
2727       use->cost_map[cand->id].comp = comp;
2728       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2729       return;
2730     }
2731
2732   /* n_map_members is a power of two, so this computes modulo.  */
2733   s = cand->id & (use->n_map_members - 1);
2734   for (i = s; i < use->n_map_members; i++)
2735     if (!use->cost_map[i].cand)
2736       goto found;
2737   for (i = 0; i < s; i++)
2738     if (!use->cost_map[i].cand)
2739       goto found;
2740
2741   gcc_unreachable ();
2742
2743 found:
2744   use->cost_map[i].cand = cand;
2745   use->cost_map[i].cost = cost;
2746   use->cost_map[i].depends_on = depends_on;
2747   use->cost_map[i].value = value;
2748   use->cost_map[i].comp = comp;
2749   use->cost_map[i].inv_expr_id = inv_expr_id;
2750 }
2751
2752 /* Gets cost of (USE, CANDIDATE) pair.  */
2753
2754 static struct cost_pair *
2755 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2756                  struct iv_cand *cand)
2757 {
2758   unsigned i, s;
2759   struct cost_pair *ret;
2760
2761   if (!cand)
2762     return NULL;
2763
2764   if (data->consider_all_candidates)
2765     {
2766       ret = use->cost_map + cand->id;
2767       if (!ret->cand)
2768         return NULL;
2769
2770       return ret;
2771     }
2772
2773   /* n_map_members is a power of two, so this computes modulo.  */
2774   s = cand->id & (use->n_map_members - 1);
2775   for (i = s; i < use->n_map_members; i++)
2776     if (use->cost_map[i].cand == cand)
2777       return use->cost_map + i;
2778     else if (use->cost_map[i].cand == NULL)
2779       return NULL;
2780   for (i = 0; i < s; i++)
2781     if (use->cost_map[i].cand == cand)
2782       return use->cost_map + i;
2783     else if (use->cost_map[i].cand == NULL)
2784       return NULL;
2785
2786   return NULL;
2787 }
2788
2789 /* Returns estimate on cost of computing SEQ.  */
2790
2791 static unsigned
2792 seq_cost (rtx seq, bool speed)
2793 {
2794   unsigned cost = 0;
2795   rtx set;
2796
2797   for (; seq; seq = NEXT_INSN (seq))
2798     {
2799       set = single_set (seq);
2800       if (set)
2801         cost += set_src_cost (SET_SRC (set), speed);
2802       else
2803         cost++;
2804     }
2805
2806   return cost;
2807 }
2808
2809 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2810 static rtx
2811 produce_memory_decl_rtl (tree obj, int *regno)
2812 {
2813   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2814   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2815   rtx x;
2816
2817   gcc_assert (obj);
2818   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2819     {
2820       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2821       x = gen_rtx_SYMBOL_REF (address_mode, name);
2822       SET_SYMBOL_REF_DECL (x, obj);
2823       x = gen_rtx_MEM (DECL_MODE (obj), x);
2824       set_mem_addr_space (x, as);
2825       targetm.encode_section_info (obj, x, true);
2826     }
2827   else
2828     {
2829       x = gen_raw_REG (address_mode, (*regno)++);
2830       x = gen_rtx_MEM (DECL_MODE (obj), x);
2831       set_mem_addr_space (x, as);
2832     }
2833
2834   return x;
2835 }
2836
2837 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2838    walk_tree.  DATA contains the actual fake register number.  */
2839
2840 static tree
2841 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2842 {
2843   tree obj = NULL_TREE;
2844   rtx x = NULL_RTX;
2845   int *regno = (int *) data;
2846
2847   switch (TREE_CODE (*expr_p))
2848     {
2849     case ADDR_EXPR:
2850       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2851            handled_component_p (*expr_p);
2852            expr_p = &TREE_OPERAND (*expr_p, 0))
2853         continue;
2854       obj = *expr_p;
2855       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2856         x = produce_memory_decl_rtl (obj, regno);
2857       break;
2858
2859     case SSA_NAME:
2860       *ws = 0;
2861       obj = SSA_NAME_VAR (*expr_p);
2862       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2863       if (!obj)
2864         return NULL_TREE;
2865       if (!DECL_RTL_SET_P (obj))
2866         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2867       break;
2868
2869     case VAR_DECL:
2870     case PARM_DECL:
2871     case RESULT_DECL:
2872       *ws = 0;
2873       obj = *expr_p;
2874
2875       if (DECL_RTL_SET_P (obj))
2876         break;
2877
2878       if (DECL_MODE (obj) == BLKmode)
2879         x = produce_memory_decl_rtl (obj, regno);
2880       else
2881         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2882
2883       break;
2884
2885     default:
2886       break;
2887     }
2888
2889   if (x)
2890     {
2891       decl_rtl_to_reset.safe_push (obj);
2892       SET_DECL_RTL (obj, x);
2893     }
2894
2895   return NULL_TREE;
2896 }
2897
2898 /* Determines cost of the computation of EXPR.  */
2899
2900 static unsigned
2901 computation_cost (tree expr, bool speed)
2902 {
2903   rtx seq, rslt;
2904   tree type = TREE_TYPE (expr);
2905   unsigned cost;
2906   /* Avoid using hard regs in ways which may be unsupported.  */
2907   int regno = LAST_VIRTUAL_REGISTER + 1;
2908   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2909   enum node_frequency real_frequency = node->frequency;
2910
2911   node->frequency = NODE_FREQUENCY_NORMAL;
2912   crtl->maybe_hot_insn_p = speed;
2913   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2914   start_sequence ();
2915   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2916   seq = get_insns ();
2917   end_sequence ();
2918   default_rtl_profile ();
2919   node->frequency = real_frequency;
2920
2921   cost = seq_cost (seq, speed);
2922   if (MEM_P (rslt))
2923     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2924                           TYPE_ADDR_SPACE (type), speed);
2925   else if (!REG_P (rslt))
2926     cost += set_src_cost (rslt, speed);
2927
2928   return cost;
2929 }
2930
2931 /* Returns variable containing the value of candidate CAND at statement AT.  */
2932
2933 static tree
2934 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2935 {
2936   if (stmt_after_increment (loop, cand, stmt))
2937     return cand->var_after;
2938   else
2939     return cand->var_before;
2940 }
2941
2942 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2943    same precision that is at least as wide as the precision of TYPE, stores
2944    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2945    type of A and B.  */
2946
2947 static tree
2948 determine_common_wider_type (tree *a, tree *b)
2949 {
2950   tree wider_type = NULL;
2951   tree suba, subb;
2952   tree atype = TREE_TYPE (*a);
2953
2954   if (CONVERT_EXPR_P (*a))
2955     {
2956       suba = TREE_OPERAND (*a, 0);
2957       wider_type = TREE_TYPE (suba);
2958       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2959         return atype;
2960     }
2961   else
2962     return atype;
2963
2964   if (CONVERT_EXPR_P (*b))
2965     {
2966       subb = TREE_OPERAND (*b, 0);
2967       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2968         return atype;
2969     }
2970   else
2971     return atype;
2972
2973   *a = suba;
2974   *b = subb;
2975   return wider_type;
2976 }
2977
2978 /* Determines the expression by that USE is expressed from induction variable
2979    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2980    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2981
2982 static bool
2983 get_computation_aff (struct loop *loop,
2984                      struct iv_use *use, struct iv_cand *cand, gimple at,
2985                      struct affine_tree_combination *aff)
2986 {
2987   tree ubase = use->iv->base;
2988   tree ustep = use->iv->step;
2989   tree cbase = cand->iv->base;
2990   tree cstep = cand->iv->step, cstep_common;
2991   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2992   tree common_type, var;
2993   tree uutype;
2994   aff_tree cbase_aff, var_aff;
2995   double_int rat;
2996
2997   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2998     {
2999       /* We do not have a precision to express the values of use.  */
3000       return false;
3001     }
3002
3003   var = var_at_stmt (loop, cand, at);
3004   uutype = unsigned_type_for (utype);
3005
3006   /* If the conversion is not noop, perform it.  */
3007   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3008     {
3009       cstep = fold_convert (uutype, cstep);
3010       cbase = fold_convert (uutype, cbase);
3011       var = fold_convert (uutype, var);
3012     }
3013
3014   if (!constant_multiple_of (ustep, cstep, &rat))
3015     return false;
3016
3017   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3018      type, we achieve better folding by computing their difference in this
3019      wider type, and cast the result to UUTYPE.  We do not need to worry about
3020      overflows, as all the arithmetics will in the end be performed in UUTYPE
3021      anyway.  */
3022   common_type = determine_common_wider_type (&ubase, &cbase);
3023
3024   /* use = ubase - ratio * cbase + ratio * var.  */
3025   tree_to_aff_combination (ubase, common_type, aff);
3026   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3027   tree_to_aff_combination (var, uutype, &var_aff);
3028
3029   /* We need to shift the value if we are after the increment.  */
3030   if (stmt_after_increment (loop, cand, at))
3031     {
3032       aff_tree cstep_aff;
3033
3034       if (common_type != uutype)
3035         cstep_common = fold_convert (common_type, cstep);
3036       else
3037         cstep_common = cstep;
3038
3039       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3040       aff_combination_add (&cbase_aff, &cstep_aff);
3041     }
3042
3043   aff_combination_scale (&cbase_aff, -rat);
3044   aff_combination_add (aff, &cbase_aff);
3045   if (common_type != uutype)
3046     aff_combination_convert (aff, uutype);
3047
3048   aff_combination_scale (&var_aff, rat);
3049   aff_combination_add (aff, &var_aff);
3050
3051   return true;
3052 }
3053
3054 /* Return the type of USE.  */
3055
3056 static tree
3057 get_use_type (struct iv_use *use)
3058 {
3059   tree base_type = TREE_TYPE (use->iv->base);
3060   tree type;
3061
3062   if (use->type == USE_ADDRESS)
3063     {
3064       /* The base_type may be a void pointer.  Create a pointer type based on
3065          the mem_ref instead.  */
3066       type = build_pointer_type (TREE_TYPE (*use->op_p));
3067       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3068                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3069     }
3070   else
3071     type = base_type;
3072
3073   return type;
3074 }
3075
3076 /* Determines the expression by that USE is expressed from induction variable
3077    CAND at statement AT in LOOP.  The computation is unshared.  */
3078
3079 static tree
3080 get_computation_at (struct loop *loop,
3081                     struct iv_use *use, struct iv_cand *cand, gimple at)
3082 {
3083   aff_tree aff;
3084   tree type = get_use_type (use);
3085
3086   if (!get_computation_aff (loop, use, cand, at, &aff))
3087     return NULL_TREE;
3088   unshare_aff_combination (&aff);
3089   return fold_convert (type, aff_combination_to_tree (&aff));
3090 }
3091
3092 /* Determines the expression by that USE is expressed from induction variable
3093    CAND in LOOP.  The computation is unshared.  */
3094
3095 static tree
3096 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3097 {
3098   return get_computation_at (loop, use, cand, use->stmt);
3099 }
3100
3101 /* Adjust the cost COST for being in loop setup rather than loop body.
3102    If we're optimizing for space, the loop setup overhead is constant;
3103    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3104 static unsigned
3105 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3106 {
3107   if (cost == INFTY)
3108     return cost;
3109   else if (optimize_loop_for_speed_p (data->current_loop))
3110     return cost / avg_loop_niter (data->current_loop);
3111   else
3112     return cost;
3113 }
3114
3115 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3116    validity for a memory reference accessing memory of mode MODE in
3117    address space AS.  */
3118
3119
3120 bool
3121 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3122                                  addr_space_t as)
3123 {
3124 #define MAX_RATIO 128
3125   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3126   static vec<sbitmap> valid_mult_list;
3127   sbitmap valid_mult;
3128
3129   if (data_index >= valid_mult_list.length ())
3130     valid_mult_list.safe_grow_cleared (data_index + 1);
3131
3132   valid_mult = valid_mult_list[data_index];
3133   if (!valid_mult)
3134     {
3135       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3136       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3137       rtx addr;
3138       HOST_WIDE_INT i;
3139
3140       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3141       bitmap_clear (valid_mult);
3142       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3143       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3144         {
3145           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3146           if (memory_address_addr_space_p (mode, addr, as))
3147             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3148         }
3149
3150       if (dump_file && (dump_flags & TDF_DETAILS))
3151         {
3152           fprintf (dump_file, "  allowed multipliers:");
3153           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3154             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3155               fprintf (dump_file, " %d", (int) i);
3156           fprintf (dump_file, "\n");
3157           fprintf (dump_file, "\n");
3158         }
3159
3160       valid_mult_list[data_index] = valid_mult;
3161     }
3162
3163   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3164     return false;
3165
3166   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3167 }
3168
3169 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3170    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3171    variable is omitted.  Compute the cost for a memory reference that accesses
3172    a memory location of mode MEM_MODE in address space AS.
3173
3174    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3175    size of MEM_MODE / RATIO) is available.  To make this determination, we
3176    look at the size of the increment to be made, which is given in CSTEP.
3177    CSTEP may be zero if the step is unknown.
3178    STMT_AFTER_INC is true iff the statement we're looking at is after the
3179    increment of the original biv.
3180
3181    TODO -- there must be some better way.  This all is quite crude.  */
3182
3183 typedef struct address_cost_data_s
3184 {
3185   HOST_WIDE_INT min_offset, max_offset;
3186   unsigned costs[2][2][2][2];
3187 } *address_cost_data;
3188
3189
3190 static comp_cost
3191 get_address_cost (bool symbol_present, bool var_present,
3192                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3193                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3194                   addr_space_t as, bool speed,
3195                   bool stmt_after_inc, bool *may_autoinc)
3196 {
3197   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3198   static vec<address_cost_data> address_cost_data_list;
3199   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3200   address_cost_data data;
3201   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3202   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3203   unsigned cost, acost, complexity;
3204   bool offset_p, ratio_p, autoinc;
3205   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3206   unsigned HOST_WIDE_INT mask;
3207   unsigned bits;
3208
3209   if (data_index >= address_cost_data_list.length ())
3210     address_cost_data_list.safe_grow_cleared (data_index + 1);
3211
3212   data = address_cost_data_list[data_index];
3213   if (!data)
3214     {
3215       HOST_WIDE_INT i;
3216       HOST_WIDE_INT rat, off = 0;
3217       int old_cse_not_expected, width;
3218       unsigned sym_p, var_p, off_p, rat_p, add_c;
3219       rtx seq, addr, base;
3220       rtx reg0, reg1;
3221
3222       data = (address_cost_data) xcalloc (1, sizeof (*data));
3223
3224       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3225
3226       width = GET_MODE_BITSIZE (address_mode) - 1;
3227       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3228         width = HOST_BITS_PER_WIDE_INT - 1;
3229       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3230
3231       for (i = width; i >= 0; i--)
3232         {
3233           off = -((unsigned HOST_WIDE_INT) 1 << i);
3234           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3235           if (memory_address_addr_space_p (mem_mode, addr, as))
3236             break;
3237         }
3238       data->min_offset = (i == -1? 0 : off);
3239
3240       for (i = width; i >= 0; i--)
3241         {
3242           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3243           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3244           if (memory_address_addr_space_p (mem_mode, addr, as))
3245             break;
3246         }
3247       if (i == -1)
3248         off = 0;
3249       data->max_offset = off;
3250
3251       if (dump_file && (dump_flags & TDF_DETAILS))
3252         {
3253           fprintf (dump_file, "get_address_cost:\n");
3254           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3255                    GET_MODE_NAME (mem_mode),
3256                    data->min_offset);
3257           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3258                    GET_MODE_NAME (mem_mode),
3259                    data->max_offset);
3260         }
3261
3262       rat = 1;
3263       for (i = 2; i <= MAX_RATIO; i++)
3264         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3265           {
3266             rat = i;
3267             break;
3268           }
3269
3270       /* Compute the cost of various addressing modes.  */
3271       acost = 0;
3272       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3273       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3274
3275       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3276           || USE_STORE_PRE_DECREMENT (mem_mode))
3277         {
3278           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3279           has_predec[mem_mode]
3280             = memory_address_addr_space_p (mem_mode, addr, as);
3281         }
3282       if (USE_LOAD_POST_DECREMENT (mem_mode)
3283           || USE_STORE_POST_DECREMENT (mem_mode))
3284         {
3285           addr = gen_rtx_POST_DEC (address_mode, reg0);
3286           has_postdec[mem_mode]
3287             = memory_address_addr_space_p (mem_mode, addr, as);
3288         }
3289       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3290           || USE_STORE_PRE_DECREMENT (mem_mode))
3291         {
3292           addr = gen_rtx_PRE_INC (address_mode, reg0);
3293           has_preinc[mem_mode]
3294             = memory_address_addr_space_p (mem_mode, addr, as);
3295         }
3296       if (USE_LOAD_POST_INCREMENT (mem_mode)
3297           || USE_STORE_POST_INCREMENT (mem_mode))
3298         {
3299           addr = gen_rtx_POST_INC (address_mode, reg0);
3300           has_postinc[mem_mode]
3301             = memory_address_addr_space_p (mem_mode, addr, as);
3302         }
3303       for (i = 0; i < 16; i++)
3304         {
3305           sym_p = i & 1;
3306           var_p = (i >> 1) & 1;
3307           off_p = (i >> 2) & 1;
3308           rat_p = (i >> 3) & 1;
3309
3310           addr = reg0;
3311           if (rat_p)
3312             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3313                                    gen_int_mode (rat, address_mode));
3314
3315           if (var_p)
3316             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3317
3318           if (sym_p)
3319             {
3320               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3321               /* ??? We can run into trouble with some backends by presenting
3322                  it with symbols which haven't been properly passed through
3323                  targetm.encode_section_info.  By setting the local bit, we
3324                  enhance the probability of things working.  */
3325               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3326
3327               if (off_p)
3328                 base = gen_rtx_fmt_e (CONST, address_mode,
3329                                       gen_rtx_fmt_ee
3330                                         (PLUS, address_mode, base,
3331                                          gen_int_mode (off, address_mode)));
3332             }
3333           else if (off_p)
3334             base = gen_int_mode (off, address_mode);
3335           else
3336             base = NULL_RTX;
3337
3338           if (base)
3339             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3340
3341           start_sequence ();
3342           /* To avoid splitting addressing modes, pretend that no cse will
3343              follow.  */
3344           old_cse_not_expected = cse_not_expected;
3345           cse_not_expected = true;
3346           addr = memory_address_addr_space (mem_mode, addr, as);
3347           cse_not_expected = old_cse_not_expected;
3348           seq = get_insns ();
3349           end_sequence ();
3350
3351           acost = seq_cost (seq, speed);
3352           acost += address_cost (addr, mem_mode, as, speed);
3353
3354           if (!acost)
3355             acost = 1;
3356           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3357         }
3358
3359       /* On some targets, it is quite expensive to load symbol to a register,
3360          which makes addresses that contain symbols look much more expensive.
3361          However, the symbol will have to be loaded in any case before the
3362          loop (and quite likely we have it in register already), so it does not
3363          make much sense to penalize them too heavily.  So make some final
3364          tweaks for the SYMBOL_PRESENT modes:
3365
3366          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3367          var is cheaper, use this mode with small penalty.
3368          If VAR_PRESENT is true, try whether the mode with
3369          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3370          if this is the case, use it.  */
3371       add_c = add_cost (speed, address_mode);
3372       for (i = 0; i < 8; i++)
3373         {
3374           var_p = i & 1;
3375           off_p = (i >> 1) & 1;
3376           rat_p = (i >> 2) & 1;
3377
3378           acost = data->costs[0][1][off_p][rat_p] + 1;
3379           if (var_p)
3380             acost += add_c;
3381
3382           if (acost < data->costs[1][var_p][off_p][rat_p])
3383             data->costs[1][var_p][off_p][rat_p] = acost;
3384         }
3385
3386       if (dump_file && (dump_flags & TDF_DETAILS))
3387         {
3388           fprintf (dump_file, "Address costs:\n");
3389
3390           for (i = 0; i < 16; i++)
3391             {
3392               sym_p = i & 1;
3393               var_p = (i >> 1) & 1;
3394               off_p = (i >> 2) & 1;
3395               rat_p = (i >> 3) & 1;
3396
3397               fprintf (dump_file, "  ");
3398               if (sym_p)
3399                 fprintf (dump_file, "sym + ");
3400               if (var_p)
3401                 fprintf (dump_file, "var + ");
3402               if (off_p)
3403                 fprintf (dump_file, "cst + ");
3404               if (rat_p)
3405                 fprintf (dump_file, "rat * ");
3406
3407               acost = data->costs[sym_p][var_p][off_p][rat_p];
3408               fprintf (dump_file, "index costs %d\n", acost);
3409             }
3410           if (has_predec[mem_mode] || has_postdec[mem_mode]
3411               || has_preinc[mem_mode] || has_postinc[mem_mode])
3412             fprintf (dump_file, "  May include autoinc/dec\n");
3413           fprintf (dump_file, "\n");
3414         }
3415
3416       address_cost_data_list[data_index] = data;
3417     }
3418
3419   bits = GET_MODE_BITSIZE (address_mode);
3420   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3421   offset &= mask;
3422   if ((offset >> (bits - 1) & 1))
3423     offset |= ~mask;
3424   s_offset = offset;
3425
3426   autoinc = false;
3427   msize = GET_MODE_SIZE (mem_mode);
3428   autoinc_offset = offset;
3429   if (stmt_after_inc)
3430     autoinc_offset += ratio * cstep;
3431   if (symbol_present || var_present || ratio != 1)
3432     autoinc = false;
3433   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3434                && msize == cstep)
3435            || (has_postdec[mem_mode] && autoinc_offset == 0
3436                && msize == -cstep)
3437            || (has_preinc[mem_mode] && autoinc_offset == msize
3438                && msize == cstep)
3439            || (has_predec[mem_mode] && autoinc_offset == -msize
3440                && msize == -cstep))
3441     autoinc = true;
3442
3443   cost = 0;
3444   offset_p = (s_offset != 0
3445               && data->min_offset <= s_offset
3446               && s_offset <= data->max_offset);
3447   ratio_p = (ratio != 1
3448              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3449
3450   if (ratio != 1 && !ratio_p)
3451     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3452
3453   if (s_offset && !offset_p && !symbol_present)
3454     cost += add_cost (speed, address_mode);
3455
3456   if (may_autoinc)
3457     *may_autoinc = autoinc;
3458   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3459   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3460   return new_cost (cost + acost, complexity);
3461 }
3462
3463  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3464     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3465     calculating the operands of EXPR.  Returns true if successful, and returns
3466     the cost in COST.  */
3467
3468 static bool
3469 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3470                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3471 {
3472   comp_cost res;
3473   tree op1 = TREE_OPERAND (expr, 1);
3474   tree cst = TREE_OPERAND (mult, 1);
3475   tree multop = TREE_OPERAND (mult, 0);
3476   int m = exact_log2 (int_cst_value (cst));
3477   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3478   int sa_cost;
3479
3480   if (!(m >= 0 && m < maxm))
3481     return false;
3482
3483   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3484              ? shiftadd_cost (speed, mode, m)
3485              : (mult == op1
3486                 ? shiftsub1_cost (speed, mode, m)
3487                 : shiftsub0_cost (speed, mode, m)));
3488   res = new_cost (sa_cost, 0);
3489   res = add_costs (res, mult == op1 ? cost0 : cost1);
3490
3491   STRIP_NOPS (multop);
3492   if (!is_gimple_val (multop))
3493     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3494
3495   *cost = res;
3496   return true;
3497 }
3498
3499 /* Estimates cost of forcing expression EXPR into a variable.  */
3500
3501 static comp_cost
3502 force_expr_to_var_cost (tree expr, bool speed)
3503 {
3504   static bool costs_initialized = false;
3505   static unsigned integer_cost [2];
3506   static unsigned symbol_cost [2];
3507   static unsigned address_cost [2];
3508   tree op0, op1;
3509   comp_cost cost0, cost1, cost;
3510   enum machine_mode mode;
3511
3512   if (!costs_initialized)
3513     {
3514       tree type = build_pointer_type (integer_type_node);
3515       tree var, addr;
3516       rtx x;
3517       int i;
3518
3519       var = create_tmp_var_raw (integer_type_node, "test_var");
3520       TREE_STATIC (var) = 1;
3521       x = produce_memory_decl_rtl (var, NULL);
3522       SET_DECL_RTL (var, x);
3523
3524       addr = build1 (ADDR_EXPR, type, var);
3525
3526
3527       for (i = 0; i < 2; i++)
3528         {
3529           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3530                                                              2000), i);
3531
3532           symbol_cost[i] = computation_cost (addr, i) + 1;
3533
3534           address_cost[i]
3535             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3536           if (dump_file && (dump_flags & TDF_DETAILS))
3537             {
3538               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3539               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3540               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3541               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3542               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3543               fprintf (dump_file, "\n");
3544             }
3545         }
3546
3547       costs_initialized = true;
3548     }
3549
3550   STRIP_NOPS (expr);
3551
3552   if (SSA_VAR_P (expr))
3553     return no_cost;
3554
3555   if (is_gimple_min_invariant (expr))
3556     {
3557       if (TREE_CODE (expr) == INTEGER_CST)
3558         return new_cost (integer_cost [speed], 0);
3559
3560       if (TREE_CODE (expr) == ADDR_EXPR)
3561         {
3562           tree obj = TREE_OPERAND (expr, 0);
3563
3564           if (TREE_CODE (obj) == VAR_DECL
3565               || TREE_CODE (obj) == PARM_DECL
3566               || TREE_CODE (obj) == RESULT_DECL)
3567             return new_cost (symbol_cost [speed], 0);
3568         }
3569
3570       return new_cost (address_cost [speed], 0);
3571     }
3572
3573   switch (TREE_CODE (expr))
3574     {
3575     case POINTER_PLUS_EXPR:
3576     case PLUS_EXPR:
3577     case MINUS_EXPR:
3578     case MULT_EXPR:
3579       op0 = TREE_OPERAND (expr, 0);
3580       op1 = TREE_OPERAND (expr, 1);
3581       STRIP_NOPS (op0);
3582       STRIP_NOPS (op1);
3583
3584       if (is_gimple_val (op0))
3585         cost0 = no_cost;
3586       else
3587         cost0 = force_expr_to_var_cost (op0, speed);
3588
3589       if (is_gimple_val (op1))
3590         cost1 = no_cost;
3591       else
3592         cost1 = force_expr_to_var_cost (op1, speed);
3593
3594       break;
3595
3596     case NEGATE_EXPR:
3597       op0 = TREE_OPERAND (expr, 0);
3598       STRIP_NOPS (op0);
3599       op1 = NULL_TREE;
3600
3601       if (is_gimple_val (op0))
3602         cost0 = no_cost;
3603       else
3604         cost0 = force_expr_to_var_cost (op0, speed);
3605
3606       cost1 = no_cost;
3607       break;
3608
3609     default:
3610       /* Just an arbitrary value, FIXME.  */
3611       return new_cost (target_spill_cost[speed], 0);
3612     }
3613
3614   mode = TYPE_MODE (TREE_TYPE (expr));
3615   switch (TREE_CODE (expr))
3616     {
3617     case POINTER_PLUS_EXPR:
3618     case PLUS_EXPR:
3619     case MINUS_EXPR:
3620     case NEGATE_EXPR:
3621       cost = new_cost (add_cost (speed, mode), 0);
3622       if (TREE_CODE (expr) != NEGATE_EXPR)
3623         {
3624           tree mult = NULL_TREE;
3625           comp_cost sa_cost;
3626           if (TREE_CODE (op1) == MULT_EXPR)
3627             mult = op1;
3628           else if (TREE_CODE (op0) == MULT_EXPR)
3629             mult = op0;
3630
3631           if (mult != NULL_TREE
3632               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3633               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3634                                     speed, &sa_cost))
3635             return sa_cost;
3636         }
3637       break;
3638
3639     case MULT_EXPR:
3640       if (cst_and_fits_in_hwi (op0))
3641         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3642                                              mode, speed), 0);
3643       else if (cst_and_fits_in_hwi (op1))
3644         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3645                                              mode, speed), 0);
3646       else
3647         return new_cost (target_spill_cost [speed], 0);
3648       break;
3649
3650     default:
3651       gcc_unreachable ();
3652     }
3653
3654   cost = add_costs (cost, cost0);
3655   cost = add_costs (cost, cost1);
3656
3657   /* Bound the cost by target_spill_cost.  The parts of complicated
3658      computations often are either loop invariant or at least can
3659      be shared between several iv uses, so letting this grow without
3660      limits would not give reasonable results.  */
3661   if (cost.cost > (int) target_spill_cost [speed])
3662     cost.cost = target_spill_cost [speed];
3663
3664   return cost;
3665 }
3666
3667 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3668    invariants the computation depends on.  */
3669
3670 static comp_cost
3671 force_var_cost (struct ivopts_data *data,
3672                 tree expr, bitmap *depends_on)
3673 {
3674   if (depends_on)
3675     {
3676       fd_ivopts_data = data;
3677       walk_tree (&expr, find_depends, depends_on, NULL);
3678     }
3679
3680   return force_expr_to_var_cost (expr, data->speed);
3681 }
3682
3683 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3684    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3685    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3686    invariants the computation depends on.  */
3687
3688 static comp_cost
3689 split_address_cost (struct ivopts_data *data,
3690                     tree addr, bool *symbol_present, bool *var_present,
3691                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3692 {
3693   tree core;
3694   HOST_WIDE_INT bitsize;
3695   HOST_WIDE_INT bitpos;
3696   tree toffset;
3697   enum machine_mode mode;
3698   int unsignedp, volatilep;
3699
3700   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3701                               &unsignedp, &volatilep, false);
3702
3703   if (toffset != 0
3704       || bitpos % BITS_PER_UNIT != 0
3705       || TREE_CODE (core) != VAR_DECL)
3706     {
3707       *symbol_present = false;
3708       *var_present = true;
3709       fd_ivopts_data = data;
3710       walk_tree (&addr, find_depends, depends_on, NULL);
3711       return new_cost (target_spill_cost[data->speed], 0);
3712     }
3713
3714   *offset += bitpos / BITS_PER_UNIT;
3715   if (TREE_STATIC (core)
3716       || DECL_EXTERNAL (core))
3717     {
3718       *symbol_present = true;
3719       *var_present = false;
3720       return no_cost;
3721     }
3722
3723   *symbol_present = false;
3724   *var_present = true;
3725   return no_cost;
3726 }
3727
3728 /* Estimates cost of expressing difference of addresses E1 - E2 as
3729    var + symbol + offset.  The value of offset is added to OFFSET,
3730    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3731    part is missing.  DEPENDS_ON is a set of the invariants the computation
3732    depends on.  */
3733
3734 static comp_cost
3735 ptr_difference_cost (struct ivopts_data *data,
3736                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3737                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3738 {
3739   HOST_WIDE_INT diff = 0;
3740   aff_tree aff_e1, aff_e2;
3741   tree type;
3742
3743   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3744
3745   if (ptr_difference_const (e1, e2, &diff))
3746     {
3747       *offset += diff;
3748       *symbol_present = false;
3749       *var_present = false;
3750       return no_cost;
3751     }
3752
3753   if (integer_zerop (e2))
3754     return split_address_cost (data, TREE_OPERAND (e1, 0),
3755                                symbol_present, var_present, offset, depends_on);
3756
3757   *symbol_present = false;
3758   *var_present = true;
3759
3760   type = signed_type_for (TREE_TYPE (e1));
3761   tree_to_aff_combination (e1, type, &aff_e1);
3762   tree_to_aff_combination (e2, type, &aff_e2);
3763   aff_combination_scale (&aff_e2, double_int_minus_one);
3764   aff_combination_add (&aff_e1, &aff_e2);
3765
3766   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3767 }
3768
3769 /* Estimates cost of expressing difference E1 - E2 as
3770    var + symbol + offset.  The value of offset is added to OFFSET,
3771    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3772    part is missing.  DEPENDS_ON is a set of the invariants the computation
3773    depends on.  */
3774
3775 static comp_cost
3776 difference_cost (struct ivopts_data *data,
3777                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3778                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3779 {
3780   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3781   unsigned HOST_WIDE_INT off1, off2;
3782   aff_tree aff_e1, aff_e2;
3783   tree type;
3784
3785   e1 = strip_offset (e1, &off1);
3786   e2 = strip_offset (e2, &off2);
3787   *offset += off1 - off2;
3788
3789   STRIP_NOPS (e1);
3790   STRIP_NOPS (e2);
3791
3792   if (TREE_CODE (e1) == ADDR_EXPR)
3793     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3794                                 offset, depends_on);
3795   *symbol_present = false;
3796
3797   if (operand_equal_p (e1, e2, 0))
3798     {
3799       *var_present = false;
3800       return no_cost;
3801     }
3802
3803   *var_present = true;
3804
3805   if (integer_zerop (e2))
3806     return force_var_cost (data, e1, depends_on);
3807
3808   if (integer_zerop (e1))
3809     {
3810       comp_cost cost = force_var_cost (data, e2, depends_on);
3811       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3812       return cost;
3813     }
3814
3815   type = signed_type_for (TREE_TYPE (e1));
3816   tree_to_aff_combination (e1, type, &aff_e1);
3817   tree_to_aff_combination (e2, type, &aff_e2);
3818   aff_combination_scale (&aff_e2, double_int_minus_one);
3819   aff_combination_add (&aff_e1, &aff_e2);
3820
3821   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3822 }
3823
3824 /* Returns true if AFF1 and AFF2 are identical.  */
3825
3826 static bool
3827 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3828 {
3829   unsigned i;
3830
3831   if (aff1->n != aff2->n)
3832     return false;
3833
3834   for (i = 0; i < aff1->n; i++)
3835     {
3836       if (aff1->elts[i].coef != aff2->elts[i].coef)
3837         return false;
3838
3839       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3840         return false;
3841     }
3842   return true;
3843 }
3844
3845 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3846
3847 static int
3848 get_expr_id (struct ivopts_data *data, tree expr)
3849 {
3850   struct iv_inv_expr_ent ent;
3851   struct iv_inv_expr_ent **slot;
3852
3853   ent.expr = expr;
3854   ent.hash = iterative_hash_expr (expr, 0);
3855   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3856   if (*slot)
3857     return (*slot)->id;
3858
3859   *slot = XNEW (struct iv_inv_expr_ent);
3860   (*slot)->expr = expr;
3861   (*slot)->hash = ent.hash;
3862   (*slot)->id = data->inv_expr_id++;
3863   return (*slot)->id;
3864 }
3865
3866 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3867    requires a new compiler generated temporary.  Returns -1 otherwise.
3868    ADDRESS_P is a flag indicating if the expression is for address
3869    computation.  */
3870
3871 static int
3872 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3873                             tree cbase, HOST_WIDE_INT ratio,
3874                             bool address_p)
3875 {
3876   aff_tree ubase_aff, cbase_aff;
3877   tree expr, ub, cb;
3878
3879   STRIP_NOPS (ubase);
3880   STRIP_NOPS (cbase);
3881   ub = ubase;
3882   cb = cbase;
3883
3884   if ((TREE_CODE (ubase) == INTEGER_CST)
3885       && (TREE_CODE (cbase) == INTEGER_CST))
3886     return -1;
3887
3888   /* Strips the constant part. */
3889   if (TREE_CODE (ubase) == PLUS_EXPR
3890       || TREE_CODE (ubase) == MINUS_EXPR
3891       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3892     {
3893       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3894         ubase = TREE_OPERAND (ubase, 0);
3895     }
3896
3897   /* Strips the constant part. */
3898   if (TREE_CODE (cbase) == PLUS_EXPR
3899       || TREE_CODE (cbase) == MINUS_EXPR
3900       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3901     {
3902       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3903         cbase = TREE_OPERAND (cbase, 0);
3904     }
3905
3906   if (address_p)
3907     {
3908       if (((TREE_CODE (ubase) == SSA_NAME)
3909            || (TREE_CODE (ubase) == ADDR_EXPR
3910                && is_gimple_min_invariant (ubase)))
3911           && (TREE_CODE (cbase) == INTEGER_CST))
3912         return -1;
3913
3914       if (((TREE_CODE (cbase) == SSA_NAME)
3915            || (TREE_CODE (cbase) == ADDR_EXPR
3916                && is_gimple_min_invariant (cbase)))
3917           && (TREE_CODE (ubase) == INTEGER_CST))
3918         return -1;
3919     }
3920
3921   if (ratio == 1)
3922     {
3923       if(operand_equal_p (ubase, cbase, 0))
3924         return -1;
3925
3926       if (TREE_CODE (ubase) == ADDR_EXPR
3927           && TREE_CODE (cbase) == ADDR_EXPR)
3928         {
3929           tree usym, csym;
3930
3931           usym = TREE_OPERAND (ubase, 0);
3932           csym = TREE_OPERAND (cbase, 0);
3933           if (TREE_CODE (usym) == ARRAY_REF)
3934             {
3935               tree ind = TREE_OPERAND (usym, 1);
3936               if (TREE_CODE (ind) == INTEGER_CST
3937                   && host_integerp (ind, 0)
3938                   && TREE_INT_CST_LOW (ind) == 0)
3939                 usym = TREE_OPERAND (usym, 0);
3940             }
3941           if (TREE_CODE (csym) == ARRAY_REF)
3942             {
3943               tree ind = TREE_OPERAND (csym, 1);
3944               if (TREE_CODE (ind) == INTEGER_CST
3945                   && host_integerp (ind, 0)
3946                   && TREE_INT_CST_LOW (ind) == 0)
3947                 csym = TREE_OPERAND (csym, 0);
3948             }
3949           if (operand_equal_p (usym, csym, 0))
3950             return -1;
3951         }
3952       /* Now do more complex comparison  */
3953       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3954       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3955       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3956         return -1;
3957     }
3958
3959   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3960   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3961
3962   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3963   aff_combination_add (&ubase_aff, &cbase_aff);
3964   expr = aff_combination_to_tree (&ubase_aff);
3965   return get_expr_id (data, expr);
3966 }
3967
3968
3969
3970 /* Determines the cost of the computation by that USE is expressed
3971    from induction variable CAND.  If ADDRESS_P is true, we just need
3972    to create an address from it, otherwise we want to get it into
3973    register.  A set of invariants we depend on is stored in
3974    DEPENDS_ON.  AT is the statement at that the value is computed.
3975    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3976    addressing is likely.  */
3977
3978 static comp_cost
3979 get_computation_cost_at (struct ivopts_data *data,
3980                          struct iv_use *use, struct iv_cand *cand,
3981                          bool address_p, bitmap *depends_on, gimple at,
3982                          bool *can_autoinc,
3983                          int *inv_expr_id)
3984 {
3985   tree ubase = use->iv->base, ustep = use->iv->step;
3986   tree cbase, cstep;
3987   tree utype = TREE_TYPE (ubase), ctype;
3988   unsigned HOST_WIDE_INT cstepi, offset = 0;
3989   HOST_WIDE_INT ratio, aratio;
3990   bool var_present, symbol_present, stmt_is_after_inc;
3991   comp_cost cost;
3992   double_int rat;
3993   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3994   enum machine_mode mem_mode = (address_p
3995                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3996                                 : VOIDmode);
3997
3998   *depends_on = NULL;
3999
4000   /* Only consider real candidates.  */
4001   if (!cand->iv)
4002     return infinite_cost;
4003
4004   cbase = cand->iv->base;
4005   cstep = cand->iv->step;
4006   ctype = TREE_TYPE (cbase);
4007
4008   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4009     {
4010       /* We do not have a precision to express the values of use.  */
4011       return infinite_cost;
4012     }
4013
4014   if (address_p
4015       || (use->iv->base_object
4016           && cand->iv->base_object
4017           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4018           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4019     {
4020       /* Do not try to express address of an object with computation based
4021          on address of a different object.  This may cause problems in rtl
4022          level alias analysis (that does not expect this to be happening,
4023          as this is illegal in C), and would be unlikely to be useful
4024          anyway.  */
4025       if (use->iv->base_object
4026           && cand->iv->base_object
4027           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4028         return infinite_cost;
4029     }
4030
4031   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4032     {
4033       /* TODO -- add direct handling of this case.  */
4034       goto fallback;
4035     }
4036
4037   /* CSTEPI is removed from the offset in case statement is after the
4038      increment.  If the step is not constant, we use zero instead.
4039      This is a bit imprecise (there is the extra addition), but
4040      redundancy elimination is likely to transform the code so that
4041      it uses value of the variable before increment anyway,
4042      so it is not that much unrealistic.  */
4043   if (cst_and_fits_in_hwi (cstep))
4044     cstepi = int_cst_value (cstep);
4045   else
4046     cstepi = 0;
4047
4048   if (!constant_multiple_of (ustep, cstep, &rat))
4049     return infinite_cost;
4050
4051   if (rat.fits_shwi ())
4052     ratio = rat.to_shwi ();
4053   else
4054     return infinite_cost;
4055
4056   STRIP_NOPS (cbase);
4057   ctype = TREE_TYPE (cbase);
4058
4059   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4060
4061   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4062      or ratio == 1, it is better to handle this like
4063
4064      ubase - ratio * cbase + ratio * var
4065
4066      (also holds in the case ratio == -1, TODO.  */
4067
4068   if (cst_and_fits_in_hwi (cbase))
4069     {
4070       offset = - ratio * int_cst_value (cbase);
4071       cost = difference_cost (data,
4072                               ubase, build_int_cst (utype, 0),
4073                               &symbol_present, &var_present, &offset,
4074                               depends_on);
4075       cost.cost /= avg_loop_niter (data->current_loop);
4076     }
4077   else if (ratio == 1)
4078     {
4079       tree real_cbase = cbase;
4080
4081       /* Check to see if any adjustment is needed.  */
4082       if (cstepi == 0 && stmt_is_after_inc)
4083         {
4084           aff_tree real_cbase_aff;
4085           aff_tree cstep_aff;
4086
4087           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4088                                    &real_cbase_aff);
4089           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4090
4091           aff_combination_add (&real_cbase_aff, &cstep_aff);
4092           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4093         }
4094
4095       cost = difference_cost (data,
4096                               ubase, real_cbase,
4097                               &symbol_present, &var_present, &offset,
4098                               depends_on);
4099       cost.cost /= avg_loop_niter (data->current_loop);
4100     }
4101   else if (address_p
4102            && !POINTER_TYPE_P (ctype)
4103            && multiplier_allowed_in_address_p
4104                 (ratio, mem_mode,
4105                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4106     {
4107       cbase
4108         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4109       cost = difference_cost (data,
4110                               ubase, cbase,
4111                               &symbol_present, &var_present, &offset,
4112                               depends_on);
4113       cost.cost /= avg_loop_niter (data->current_loop);
4114     }
4115   else
4116     {
4117       cost = force_var_cost (data, cbase, depends_on);
4118       cost = add_costs (cost,
4119                         difference_cost (data,
4120                                          ubase, build_int_cst (utype, 0),
4121                                          &symbol_present, &var_present,
4122                                          &offset, depends_on));
4123       cost.cost /= avg_loop_niter (data->current_loop);
4124       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4125     }
4126
4127   if (inv_expr_id)
4128     {
4129       *inv_expr_id =
4130           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4131       /* Clear depends on.  */
4132       if (*inv_expr_id != -1 && depends_on && *depends_on)
4133         bitmap_clear (*depends_on);
4134     }
4135
4136   /* If we are after the increment, the value of the candidate is higher by
4137      one iteration.  */
4138   if (stmt_is_after_inc)
4139     offset -= ratio * cstepi;
4140
4141   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4142      (symbol/var1/const parts may be omitted).  If we are looking for an
4143      address, find the cost of addressing this.  */
4144   if (address_p)
4145     return add_costs (cost,
4146                       get_address_cost (symbol_present, var_present,
4147                                         offset, ratio, cstepi,
4148                                         mem_mode,
4149                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4150                                         speed, stmt_is_after_inc,
4151                                         can_autoinc));
4152
4153   /* Otherwise estimate the costs for computing the expression.  */
4154   if (!symbol_present && !var_present && !offset)
4155     {
4156       if (ratio != 1)
4157         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4158       return cost;
4159     }
4160
4161   /* Symbol + offset should be compile-time computable so consider that they
4162       are added once to the variable, if present.  */
4163   if (var_present && (symbol_present || offset))
4164     cost.cost += adjust_setup_cost (data,
4165                                     add_cost (speed, TYPE_MODE (ctype)));
4166
4167   /* Having offset does not affect runtime cost in case it is added to
4168      symbol, but it increases complexity.  */
4169   if (offset)
4170     cost.complexity++;
4171
4172   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4173
4174   aratio = ratio > 0 ? ratio : -ratio;
4175   if (aratio != 1)
4176     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4177   return cost;
4178
4179 fallback:
4180   if (can_autoinc)
4181     *can_autoinc = false;
4182
4183   {
4184     /* Just get the expression, expand it and measure the cost.  */
4185     tree comp = get_computation_at (data->current_loop, use, cand, at);
4186
4187     if (!comp)
4188       return infinite_cost;
4189
4190     if (address_p)
4191       comp = build_simple_mem_ref (comp);
4192
4193     return new_cost (computation_cost (comp, speed), 0);
4194   }
4195 }
4196
4197 /* Determines the cost of the computation by that USE is expressed
4198    from induction variable CAND.  If ADDRESS_P is true, we just need
4199    to create an address from it, otherwise we want to get it into
4200    register.  A set of invariants we depend on is stored in
4201    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4202    autoinc addressing is likely.  */
4203
4204 static comp_cost
4205 get_computation_cost (struct ivopts_data *data,
4206                       struct iv_use *use, struct iv_cand *cand,
4207                       bool address_p, bitmap *depends_on,
4208                       bool *can_autoinc, int *inv_expr_id)
4209 {
4210   return get_computation_cost_at (data,
4211                                   use, cand, address_p, depends_on, use->stmt,
4212                                   can_autoinc, inv_expr_id);
4213 }
4214
4215 /* Determines cost of basing replacement of USE on CAND in a generic
4216    expression.  */
4217
4218 static bool
4219 determine_use_iv_cost_generic (struct ivopts_data *data,
4220                                struct iv_use *use, struct iv_cand *cand)
4221 {
4222   bitmap depends_on;
4223   comp_cost cost;
4224   int inv_expr_id = -1;
4225
4226   /* The simple case first -- if we need to express value of the preserved
4227      original biv, the cost is 0.  This also prevents us from counting the
4228      cost of increment twice -- once at this use and once in the cost of
4229      the candidate.  */
4230   if (cand->pos == IP_ORIGINAL
4231       && cand->incremented_at == use->stmt)
4232     {
4233       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4234                        ERROR_MARK, -1);
4235       return true;
4236     }
4237
4238   cost = get_computation_cost (data, use, cand, false, &depends_on,
4239                                NULL, &inv_expr_id);
4240
4241   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4242                    inv_expr_id);
4243
4244   return !infinite_cost_p (cost);
4245 }
4246
4247 /* Determines cost of basing replacement of USE on CAND in an address.  */
4248
4249 static bool
4250 determine_use_iv_cost_address (struct ivopts_data *data,
4251                                struct iv_use *use, struct iv_cand *cand)
4252 {
4253   bitmap depends_on;
4254   bool can_autoinc;
4255   int inv_expr_id = -1;
4256   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4257                                          &can_autoinc, &inv_expr_id);
4258
4259   if (cand->ainc_use == use)
4260     {
4261       if (can_autoinc)
4262         cost.cost -= cand->cost_step;
4263       /* If we generated the candidate solely for exploiting autoincrement
4264          opportunities, and it turns out it can't be used, set the cost to
4265          infinity to make sure we ignore it.  */
4266       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4267         cost = infinite_cost;
4268     }
4269   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4270                    inv_expr_id);
4271
4272   return !infinite_cost_p (cost);
4273 }
4274
4275 /* Computes value of candidate CAND at position AT in iteration NITER, and
4276    stores it to VAL.  */
4277
4278 static void
4279 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4280                aff_tree *val)
4281 {
4282   aff_tree step, delta, nit;
4283   struct iv *iv = cand->iv;
4284   tree type = TREE_TYPE (iv->base);
4285   tree steptype = type;
4286   if (POINTER_TYPE_P (type))
4287     steptype = sizetype;
4288
4289   tree_to_aff_combination (iv->step, steptype, &step);
4290   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4291   aff_combination_convert (&nit, steptype);
4292   aff_combination_mult (&nit, &step, &delta);
4293   if (stmt_after_increment (loop, cand, at))
4294     aff_combination_add (&delta, &step);
4295
4296   tree_to_aff_combination (iv->base, type, val);
4297   aff_combination_add (val, &delta);
4298 }
4299
4300 /* Returns period of induction variable iv.  */
4301
4302 static tree
4303 iv_period (struct iv *iv)
4304 {
4305   tree step = iv->step, period, type;
4306   tree pow2div;
4307
4308   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4309
4310   type = unsigned_type_for (TREE_TYPE (step));
4311   /* Period of the iv is lcm (step, type_range)/step -1,
4312      i.e., N*type_range/step - 1. Since type range is power
4313      of two, N == (step >> num_of_ending_zeros_binary (step),
4314      so the final result is
4315
4316        (type_range >> num_of_ending_zeros_binary (step)) - 1
4317
4318   */
4319   pow2div = num_ending_zeros (step);
4320
4321   period = build_low_bits_mask (type,
4322                                 (TYPE_PRECISION (type)
4323                                  - tree_low_cst (pow2div, 1)));
4324
4325   return period;
4326 }
4327
4328 /* Returns the comparison operator used when eliminating the iv USE.  */
4329
4330 static enum tree_code
4331 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4332 {
4333   struct loop *loop = data->current_loop;
4334   basic_block ex_bb;
4335   edge exit;
4336
4337   ex_bb = gimple_bb (use->stmt);
4338   exit = EDGE_SUCC (ex_bb, 0);
4339   if (flow_bb_inside_loop_p (loop, exit->dest))
4340     exit = EDGE_SUCC (ex_bb, 1);
4341
4342   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4343 }
4344
4345 static tree
4346 strip_wrap_conserving_type_conversions (tree exp)
4347 {
4348   while (tree_ssa_useless_type_conversion (exp)
4349          && (nowrap_type_p (TREE_TYPE (exp))
4350              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4351     exp = TREE_OPERAND (exp, 0);
4352   return exp;
4353 }
4354
4355 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4356    check for an exact match.  */
4357
4358 static bool
4359 expr_equal_p (tree e, tree what)
4360 {
4361   gimple stmt;
4362   enum tree_code code;
4363
4364   e = strip_wrap_conserving_type_conversions (e);
4365   what = strip_wrap_conserving_type_conversions (what);
4366
4367   code = TREE_CODE (what);
4368   if (TREE_TYPE (e) != TREE_TYPE (what))
4369     return false;
4370
4371   if (operand_equal_p (e, what, 0))
4372     return true;
4373
4374   if (TREE_CODE (e) != SSA_NAME)
4375     return false;
4376
4377   stmt = SSA_NAME_DEF_STMT (e);
4378   if (gimple_code (stmt) != GIMPLE_ASSIGN
4379       || gimple_assign_rhs_code (stmt) != code)
4380     return false;
4381
4382   switch (get_gimple_rhs_class (code))
4383     {
4384     case GIMPLE_BINARY_RHS:
4385       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4386         return false;
4387       /* Fallthru.  */
4388
4389     case GIMPLE_UNARY_RHS:
4390     case GIMPLE_SINGLE_RHS:
4391       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4392     default:
4393       return false;
4394     }
4395 }
4396
4397 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4398    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4399    calculation is performed in non-wrapping type.
4400
4401    TODO: More generally, we could test for the situation that
4402          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4403          This would require knowing the sign of OFFSET.
4404
4405          Also, we only look for the first addition in the computation of BASE.
4406          More complex analysis would be better, but introducing it just for
4407          this optimization seems like an overkill.  */
4408
4409 static bool
4410 difference_cannot_overflow_p (tree base, tree offset)
4411 {
4412   enum tree_code code;
4413   tree e1, e2;
4414
4415   if (!nowrap_type_p (TREE_TYPE (base)))
4416     return false;
4417
4418   base = expand_simple_operations (base);
4419
4420   if (TREE_CODE (base) == SSA_NAME)
4421     {
4422       gimple stmt = SSA_NAME_DEF_STMT (base);
4423
4424       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4425         return false;
4426
4427       code = gimple_assign_rhs_code (stmt);
4428       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4429         return false;
4430
4431       e1 = gimple_assign_rhs1 (stmt);
4432       e2 = gimple_assign_rhs2 (stmt);
4433     }
4434   else
4435     {
4436       code = TREE_CODE (base);
4437       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4438         return false;
4439       e1 = TREE_OPERAND (base, 0);
4440       e2 = TREE_OPERAND (base, 1);
4441     }
4442
4443   /* TODO: deeper inspection may be necessary to prove the equality.  */
4444   switch (code)
4445     {
4446     case PLUS_EXPR:
4447       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4448     case POINTER_PLUS_EXPR:
4449       return expr_equal_p (e2, offset);
4450
4451     default:
4452       return false;
4453     }
4454 }
4455
4456 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4457    comparison with CAND.  NITER describes the number of iterations of
4458    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4459
4460    We aim to handle the following situation:
4461
4462    sometype *base, *p;
4463    int a, b, i;
4464
4465    i = a;
4466    p = p_0 = base + a;
4467
4468    do
4469      {
4470        bla (*p);
4471        p++;
4472        i++;
4473      }
4474    while (i < b);
4475
4476    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4477    We aim to optimize this to
4478
4479    p = p_0 = base + a;
4480    do
4481      {
4482        bla (*p);
4483        p++;
4484      }
4485    while (p < p_0 - a + b);
4486
4487    This preserves the correctness, since the pointer arithmetics does not
4488    overflow.  More precisely:
4489
4490    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4491       overflow in computing it or the values of p.
4492    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4493       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4494
4495 static bool
4496 iv_elimination_compare_lt (struct ivopts_data *data,
4497                            struct iv_cand *cand, enum tree_code *comp_p,
4498                            struct tree_niter_desc *niter)
4499 {
4500   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4501   struct affine_tree_combination nit, tmpa, tmpb;
4502   enum tree_code comp;
4503   HOST_WIDE_INT step;
4504
4505   /* We need to know that the candidate induction variable does not overflow.
4506      While more complex analysis may be used to prove this, for now just
4507      check that the variable appears in the original program and that it
4508      is computed in a type that guarantees no overflows.  */
4509   cand_type = TREE_TYPE (cand->iv->base);
4510   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4511     return false;
4512
4513   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4514      the calculation of the BOUND could overflow, making the comparison
4515      invalid.  */
4516   if (!data->loop_single_exit_p)
4517     return false;
4518
4519   /* We need to be able to decide whether candidate is increasing or decreasing
4520      in order to choose the right comparison operator.  */
4521   if (!cst_and_fits_in_hwi (cand->iv->step))
4522     return false;
4523   step = int_cst_value (cand->iv->step);
4524
4525   /* Check that the number of iterations matches the expected pattern:
4526      a + 1 > b ? 0 : b - a - 1.  */
4527   mbz = niter->may_be_zero;
4528   if (TREE_CODE (mbz) == GT_EXPR)
4529     {
4530       /* Handle a + 1 > b.  */
4531       tree op0 = TREE_OPERAND (mbz, 0);
4532       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4533         {
4534           a = TREE_OPERAND (op0, 0);
4535           b = TREE_OPERAND (mbz, 1);
4536         }
4537       else
4538         return false;
4539     }
4540   else if (TREE_CODE (mbz) == LT_EXPR)
4541     {
4542       tree op1 = TREE_OPERAND (mbz, 1);
4543
4544       /* Handle b < a + 1.  */
4545       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4546         {
4547           a = TREE_OPERAND (op1, 0);
4548           b = TREE_OPERAND (mbz, 0);
4549         }
4550       else
4551         return false;
4552     }
4553   else
4554     return false;
4555
4556   /* Expected number of iterations is B - A - 1.  Check that it matches
4557      the actual number, i.e., that B - A - NITER = 1.  */
4558   tree_to_aff_combination (niter->niter, nit_type, &nit);
4559   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4560   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4561   aff_combination_scale (&nit, double_int_minus_one);
4562   aff_combination_scale (&tmpa, double_int_minus_one);
4563   aff_combination_add (&tmpb, &tmpa);
4564   aff_combination_add (&tmpb, &nit);
4565   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4566     return false;
4567
4568   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4569      overflow.  */
4570   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4571                         cand->iv->step,
4572                         fold_convert (TREE_TYPE (cand->iv->step), a));
4573   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4574     return false;
4575
4576   /* Determine the new comparison operator.  */
4577   comp = step < 0 ? GT_EXPR : LT_EXPR;
4578   if (*comp_p == NE_EXPR)
4579     *comp_p = comp;
4580   else if (*comp_p == EQ_EXPR)
4581     *comp_p = invert_tree_comparison (comp, false);
4582   else
4583     gcc_unreachable ();
4584
4585   return true;
4586 }
4587
4588 /* Check whether it is possible to express the condition in USE by comparison
4589    of candidate CAND.  If so, store the value compared with to BOUND, and the
4590    comparison operator to COMP.  */
4591
4592 static bool
4593 may_eliminate_iv (struct ivopts_data *data,
4594                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4595                   enum tree_code *comp)
4596 {
4597   basic_block ex_bb;
4598   edge exit;
4599   tree period;
4600   struct loop *loop = data->current_loop;
4601   aff_tree bnd;
4602   struct tree_niter_desc *desc = NULL;
4603
4604   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4605     return false;
4606
4607   /* For now works only for exits that dominate the loop latch.
4608      TODO: extend to other conditions inside loop body.  */
4609   ex_bb = gimple_bb (use->stmt);
4610   if (use->stmt != last_stmt (ex_bb)
4611       || gimple_code (use->stmt) != GIMPLE_COND
4612       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4613     return false;
4614
4615   exit = EDGE_SUCC (ex_bb, 0);
4616   if (flow_bb_inside_loop_p (loop, exit->dest))
4617     exit = EDGE_SUCC (ex_bb, 1);
4618   if (flow_bb_inside_loop_p (loop, exit->dest))
4619     return false;
4620
4621   desc = niter_for_exit (data, exit);
4622   if (!desc)
4623     return false;
4624
4625   /* Determine whether we can use the variable to test the exit condition.
4626      This is the case iff the period of the induction variable is greater
4627      than the number of iterations for which the exit condition is true.  */
4628   period = iv_period (cand->iv);
4629
4630   /* If the number of iterations is constant, compare against it directly.  */
4631   if (TREE_CODE (desc->niter) == INTEGER_CST)
4632     {
4633       /* See cand_value_at.  */
4634       if (stmt_after_increment (loop, cand, use->stmt))
4635         {
4636           if (!tree_int_cst_lt (desc->niter, period))
4637             return false;
4638         }
4639       else
4640         {
4641           if (tree_int_cst_lt (period, desc->niter))
4642             return false;
4643         }
4644     }
4645
4646   /* If not, and if this is the only possible exit of the loop, see whether
4647      we can get a conservative estimate on the number of iterations of the
4648      entire loop and compare against that instead.  */
4649   else
4650     {
4651       double_int period_value, max_niter;
4652
4653       max_niter = desc->max;
4654       if (stmt_after_increment (loop, cand, use->stmt))
4655         max_niter += double_int_one;
4656       period_value = tree_to_double_int (period);
4657       if (max_niter.ugt (period_value))
4658         {
4659           /* See if we can take advantage of inferred loop bound information.  */
4660           if (data->loop_single_exit_p)
4661             {
4662               if (!max_loop_iterations (loop, &max_niter))
4663                 return false;
4664               /* The loop bound is already adjusted by adding 1.  */
4665               if (max_niter.ugt (period_value))
4666                 return false;
4667             }
4668           else
4669             return false;
4670         }
4671     }
4672
4673   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4674
4675   *bound = aff_combination_to_tree (&bnd);
4676   *comp = iv_elimination_compare (data, use);
4677
4678   /* It is unlikely that computing the number of iterations using division
4679      would be more profitable than keeping the original induction variable.  */
4680   if (expression_expensive_p (*bound))
4681     return false;
4682
4683   /* Sometimes, it is possible to handle the situation that the number of
4684      iterations may be zero unless additional assumtions by using <
4685      instead of != in the exit condition.
4686
4687      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4688            base the exit condition on it.  However, that is often too
4689            expensive.  */
4690   if (!integer_zerop (desc->may_be_zero))
4691     return iv_elimination_compare_lt (data, cand, comp, desc);
4692
4693   return true;
4694 }
4695
4696  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4697     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4698
4699 static int
4700 parm_decl_cost (struct ivopts_data *data, tree bound)
4701 {
4702   tree sbound = bound;
4703   STRIP_NOPS (sbound);
4704
4705   if (TREE_CODE (sbound) == SSA_NAME
4706       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4707       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4708       && data->body_includes_call)
4709     return COSTS_N_INSNS (1);
4710
4711   return 0;
4712 }
4713
4714 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4715
4716 static bool
4717 determine_use_iv_cost_condition (struct ivopts_data *data,
4718                                  struct iv_use *use, struct iv_cand *cand)
4719 {
4720   tree bound = NULL_TREE;
4721   struct iv *cmp_iv;
4722   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4723   comp_cost elim_cost, express_cost, cost, bound_cost;
4724   bool ok;
4725   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4726   tree *control_var, *bound_cst;
4727   enum tree_code comp = ERROR_MARK;
4728
4729   /* Only consider real candidates.  */
4730   if (!cand->iv)
4731     {
4732       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4733                        ERROR_MARK, -1);
4734       return false;
4735     }
4736
4737   /* Try iv elimination.  */
4738   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4739     {
4740       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4741       if (elim_cost.cost == 0)
4742         elim_cost.cost = parm_decl_cost (data, bound);
4743       else if (TREE_CODE (bound) == INTEGER_CST)
4744         elim_cost.cost = 0;
4745       /* If we replace a loop condition 'i < n' with 'p < base + n',
4746          depends_on_elim will have 'base' and 'n' set, which implies
4747          that both 'base' and 'n' will be live during the loop.  More likely,
4748          'base + n' will be loop invariant, resulting in only one live value
4749          during the loop.  So in that case we clear depends_on_elim and set
4750         elim_inv_expr_id instead.  */
4751       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4752         {
4753           elim_inv_expr_id = get_expr_id (data, bound);
4754           bitmap_clear (depends_on_elim);
4755         }
4756       /* The bound is a loop invariant, so it will be only computed
4757          once.  */
4758       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4759     }
4760   else
4761     elim_cost = infinite_cost;
4762
4763   /* Try expressing the original giv.  If it is compared with an invariant,
4764      note that we cannot get rid of it.  */
4765   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4766                               NULL, &cmp_iv);
4767   gcc_assert (ok);
4768
4769   /* When the condition is a comparison of the candidate IV against
4770      zero, prefer this IV.
4771
4772      TODO: The constant that we're subtracting from the cost should
4773      be target-dependent.  This information should be added to the
4774      target costs for each backend.  */
4775   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4776       && integer_zerop (*bound_cst)
4777       && (operand_equal_p (*control_var, cand->var_after, 0)
4778           || operand_equal_p (*control_var, cand->var_before, 0)))
4779     elim_cost.cost -= 1;
4780
4781   express_cost = get_computation_cost (data, use, cand, false,
4782                                        &depends_on_express, NULL,
4783                                        &express_inv_expr_id);
4784   fd_ivopts_data = data;
4785   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4786
4787   /* Count the cost of the original bound as well.  */
4788   bound_cost = force_var_cost (data, *bound_cst, NULL);
4789   if (bound_cost.cost == 0)
4790     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4791   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4792     bound_cost.cost = 0;
4793   express_cost.cost += bound_cost.cost;
4794
4795   /* Choose the better approach, preferring the eliminated IV. */
4796   if (compare_costs (elim_cost, express_cost) <= 0)
4797     {
4798       cost = elim_cost;
4799       depends_on = depends_on_elim;
4800       depends_on_elim = NULL;
4801       inv_expr_id = elim_inv_expr_id;
4802     }
4803   else
4804     {
4805       cost = express_cost;
4806       depends_on = depends_on_express;
4807       depends_on_express = NULL;
4808       bound = NULL_TREE;
4809       comp = ERROR_MARK;
4810       inv_expr_id = express_inv_expr_id;
4811     }
4812
4813   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4814
4815   if (depends_on_elim)
4816     BITMAP_FREE (depends_on_elim);
4817   if (depends_on_express)
4818     BITMAP_FREE (depends_on_express);
4819
4820   return !infinite_cost_p (cost);
4821 }
4822
4823 /* Determines cost of basing replacement of USE on CAND.  Returns false
4824    if USE cannot be based on CAND.  */
4825
4826 static bool
4827 determine_use_iv_cost (struct ivopts_data *data,
4828                        struct iv_use *use, struct iv_cand *cand)
4829 {
4830   switch (use->type)
4831     {
4832     case USE_NONLINEAR_EXPR:
4833       return determine_use_iv_cost_generic (data, use, cand);
4834
4835     case USE_ADDRESS:
4836       return determine_use_iv_cost_address (data, use, cand);
4837
4838     case USE_COMPARE:
4839       return determine_use_iv_cost_condition (data, use, cand);
4840
4841     default:
4842       gcc_unreachable ();
4843     }
4844 }
4845
4846 /* Return true if get_computation_cost indicates that autoincrement is
4847    a possibility for the pair of USE and CAND, false otherwise.  */
4848
4849 static bool
4850 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4851                            struct iv_cand *cand)
4852 {
4853   bitmap depends_on;
4854   bool can_autoinc;
4855   comp_cost cost;
4856
4857   if (use->type != USE_ADDRESS)
4858     return false;
4859
4860   cost = get_computation_cost (data, use, cand, true, &depends_on,
4861                                &can_autoinc, NULL);
4862
4863   BITMAP_FREE (depends_on);
4864
4865   return !infinite_cost_p (cost) && can_autoinc;
4866 }
4867
4868 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4869    use that allows autoincrement, and set their AINC_USE if possible.  */
4870
4871 static void
4872 set_autoinc_for_original_candidates (struct ivopts_data *data)
4873 {
4874   unsigned i, j;
4875
4876   for (i = 0; i < n_iv_cands (data); i++)
4877     {
4878       struct iv_cand *cand = iv_cand (data, i);
4879       struct iv_use *closest = NULL;
4880       if (cand->pos != IP_ORIGINAL)
4881         continue;
4882       for (j = 0; j < n_iv_uses (data); j++)
4883         {
4884           struct iv_use *use = iv_use (data, j);
4885           unsigned uid = gimple_uid (use->stmt);
4886           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4887               || uid > gimple_uid (cand->incremented_at))
4888             continue;
4889           if (closest == NULL || uid > gimple_uid (closest->stmt))
4890             closest = use;
4891         }
4892       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4893         continue;
4894       cand->ainc_use = closest;
4895     }
4896 }
4897
4898 /* Finds the candidates for the induction variables.  */
4899
4900 static void
4901 find_iv_candidates (struct ivopts_data *data)
4902 {
4903   /* Add commonly used ivs.  */
4904   add_standard_iv_candidates (data);
4905
4906   /* Add old induction variables.  */
4907   add_old_ivs_candidates (data);
4908
4909   /* Add induction variables derived from uses.  */
4910   add_derived_ivs_candidates (data);
4911
4912   set_autoinc_for_original_candidates (data);
4913
4914   /* Record the important candidates.  */
4915   record_important_candidates (data);
4916 }
4917
4918 /* Determines costs of basing the use of the iv on an iv candidate.  */
4919
4920 static void
4921 determine_use_iv_costs (struct ivopts_data *data)
4922 {
4923   unsigned i, j;
4924   struct iv_use *use;
4925   struct iv_cand *cand;
4926   bitmap to_clear = BITMAP_ALLOC (NULL);
4927
4928   alloc_use_cost_map (data);
4929
4930   for (i = 0; i < n_iv_uses (data); i++)
4931     {
4932       use = iv_use (data, i);
4933
4934       if (data->consider_all_candidates)
4935         {
4936           for (j = 0; j < n_iv_cands (data); j++)
4937             {
4938               cand = iv_cand (data, j);
4939               determine_use_iv_cost (data, use, cand);
4940             }
4941         }
4942       else
4943         {
4944           bitmap_iterator bi;
4945
4946           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4947             {
4948               cand = iv_cand (data, j);
4949               if (!determine_use_iv_cost (data, use, cand))
4950                 bitmap_set_bit (to_clear, j);
4951             }
4952
4953           /* Remove the candidates for that the cost is infinite from
4954              the list of related candidates.  */
4955           bitmap_and_compl_into (use->related_cands, to_clear);
4956           bitmap_clear (to_clear);
4957         }
4958     }
4959
4960   BITMAP_FREE (to_clear);
4961
4962   if (dump_file && (dump_flags & TDF_DETAILS))
4963     {
4964       fprintf (dump_file, "Use-candidate costs:\n");
4965
4966       for (i = 0; i < n_iv_uses (data); i++)
4967         {
4968           use = iv_use (data, i);
4969
4970           fprintf (dump_file, "Use %d:\n", i);
4971           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4972           for (j = 0; j < use->n_map_members; j++)
4973             {
4974               if (!use->cost_map[j].cand
4975                   || infinite_cost_p (use->cost_map[j].cost))
4976                 continue;
4977
4978               fprintf (dump_file, "  %d\t%d\t%d\t",
4979                        use->cost_map[j].cand->id,
4980                        use->cost_map[j].cost.cost,
4981                        use->cost_map[j].cost.complexity);
4982               if (use->cost_map[j].depends_on)
4983                 bitmap_print (dump_file,
4984                               use->cost_map[j].depends_on, "","");
4985               if (use->cost_map[j].inv_expr_id != -1)
4986                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4987               fprintf (dump_file, "\n");
4988             }
4989
4990           fprintf (dump_file, "\n");
4991         }
4992       fprintf (dump_file, "\n");
4993     }
4994 }
4995
4996 /* Determines cost of the candidate CAND.  */
4997
4998 static void
4999 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5000 {
5001   comp_cost cost_base;
5002   unsigned cost, cost_step;
5003   tree base;
5004
5005   if (!cand->iv)
5006     {
5007       cand->cost = 0;
5008       return;
5009     }
5010
5011   /* There are two costs associated with the candidate -- its increment
5012      and its initialization.  The second is almost negligible for any loop
5013      that rolls enough, so we take it just very little into account.  */
5014
5015   base = cand->iv->base;
5016   cost_base = force_var_cost (data, base, NULL);
5017   /* It will be exceptional that the iv register happens to be initialized with
5018      the proper value at no cost.  In general, there will at least be a regcopy
5019      or a const set.  */
5020   if (cost_base.cost == 0)
5021     cost_base.cost = COSTS_N_INSNS (1);
5022   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5023
5024   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5025
5026   /* Prefer the original ivs unless we may gain something by replacing it.
5027      The reason is to make debugging simpler; so this is not relevant for
5028      artificial ivs created by other optimization passes.  */
5029   if (cand->pos != IP_ORIGINAL
5030       || !SSA_NAME_VAR (cand->var_before)
5031       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5032     cost++;
5033
5034   /* Prefer not to insert statements into latch unless there are some
5035      already (so that we do not create unnecessary jumps).  */
5036   if (cand->pos == IP_END
5037       && empty_block_p (ip_end_pos (data->current_loop)))
5038     cost++;
5039
5040   cand->cost = cost;
5041   cand->cost_step = cost_step;
5042 }
5043
5044 /* Determines costs of computation of the candidates.  */
5045
5046 static void
5047 determine_iv_costs (struct ivopts_data *data)
5048 {
5049   unsigned i;
5050
5051   if (dump_file && (dump_flags & TDF_DETAILS))
5052     {
5053       fprintf (dump_file, "Candidate costs:\n");
5054       fprintf (dump_file, "  cand\tcost\n");
5055     }
5056
5057   for (i = 0; i < n_iv_cands (data); i++)
5058     {
5059       struct iv_cand *cand = iv_cand (data, i);
5060
5061       determine_iv_cost (data, cand);
5062
5063       if (dump_file && (dump_flags & TDF_DETAILS))
5064         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5065     }
5066
5067   if (dump_file && (dump_flags & TDF_DETAILS))
5068     fprintf (dump_file, "\n");
5069 }
5070
5071 /* Calculates cost for having SIZE induction variables.  */
5072
5073 static unsigned
5074 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5075 {
5076   /* We add size to the cost, so that we prefer eliminating ivs
5077      if possible.  */
5078   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5079                                             data->body_includes_call);
5080 }
5081
5082 /* For each size of the induction variable set determine the penalty.  */
5083
5084 static void
5085 determine_set_costs (struct ivopts_data *data)
5086 {
5087   unsigned j, n;
5088   gimple phi;
5089   gimple_stmt_iterator psi;
5090   tree op;
5091   struct loop *loop = data->current_loop;
5092   bitmap_iterator bi;
5093
5094   if (dump_file && (dump_flags & TDF_DETAILS))
5095     {
5096       fprintf (dump_file, "Global costs:\n");
5097       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5098       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5099       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5100       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5101     }
5102
5103   n = 0;
5104   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5105     {
5106       phi = gsi_stmt (psi);
5107       op = PHI_RESULT (phi);
5108
5109       if (virtual_operand_p (op))
5110         continue;
5111
5112       if (get_iv (data, op))
5113         continue;
5114
5115       n++;
5116     }
5117
5118   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5119     {
5120       struct version_info *info = ver_info (data, j);
5121
5122       if (info->inv_id && info->has_nonlin_use)
5123         n++;
5124     }
5125
5126   data->regs_used = n;
5127   if (dump_file && (dump_flags & TDF_DETAILS))
5128     fprintf (dump_file, "  regs_used %d\n", n);
5129
5130   if (dump_file && (dump_flags & TDF_DETAILS))
5131     {
5132       fprintf (dump_file, "  cost for size:\n");
5133       fprintf (dump_file, "  ivs\tcost\n");
5134       for (j = 0; j <= 2 * target_avail_regs; j++)
5135         fprintf (dump_file, "  %d\t%d\n", j,
5136                  ivopts_global_cost_for_size (data, j));
5137       fprintf (dump_file, "\n");
5138     }
5139 }
5140
5141 /* Returns true if A is a cheaper cost pair than B.  */
5142
5143 static bool
5144 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5145 {
5146   int cmp;
5147
5148   if (!a)
5149     return false;
5150
5151   if (!b)
5152     return true;
5153
5154   cmp = compare_costs (a->cost, b->cost);
5155   if (cmp < 0)
5156     return true;
5157
5158   if (cmp > 0)
5159     return false;
5160
5161   /* In case the costs are the same, prefer the cheaper candidate.  */
5162   if (a->cand->cost < b->cand->cost)
5163     return true;
5164
5165   return false;
5166 }
5167
5168
5169 /* Returns candidate by that USE is expressed in IVS.  */
5170
5171 static struct cost_pair *
5172 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5173 {
5174   return ivs->cand_for_use[use->id];
5175 }
5176
5177 /* Computes the cost field of IVS structure.  */
5178
5179 static void
5180 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5181 {
5182   comp_cost cost = ivs->cand_use_cost;
5183
5184   cost.cost += ivs->cand_cost;
5185
5186   cost.cost += ivopts_global_cost_for_size (data,
5187                                             ivs->n_regs + ivs->num_used_inv_expr);
5188
5189   ivs->cost = cost;
5190 }
5191
5192 /* Remove invariants in set INVS to set IVS.  */
5193
5194 static void
5195 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5196 {
5197   bitmap_iterator bi;
5198   unsigned iid;
5199
5200   if (!invs)
5201     return;
5202
5203   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5204     {
5205       ivs->n_invariant_uses[iid]--;
5206       if (ivs->n_invariant_uses[iid] == 0)
5207         ivs->n_regs--;
5208     }
5209 }
5210
5211 /* Set USE not to be expressed by any candidate in IVS.  */
5212
5213 static void
5214 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5215                  struct iv_use *use)
5216 {
5217   unsigned uid = use->id, cid;
5218   struct cost_pair *cp;
5219
5220   cp = ivs->cand_for_use[uid];
5221   if (!cp)
5222     return;
5223   cid = cp->cand->id;
5224
5225   ivs->bad_uses++;
5226   ivs->cand_for_use[uid] = NULL;
5227   ivs->n_cand_uses[cid]--;
5228
5229   if (ivs->n_cand_uses[cid] == 0)
5230     {
5231       bitmap_clear_bit (ivs->cands, cid);
5232       /* Do not count the pseudocandidates.  */
5233       if (cp->cand->iv)
5234         ivs->n_regs--;
5235       ivs->n_cands--;
5236       ivs->cand_cost -= cp->cand->cost;
5237
5238       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5239     }
5240
5241   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5242
5243   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5244
5245   if (cp->inv_expr_id != -1)
5246     {
5247       ivs->used_inv_expr[cp->inv_expr_id]--;
5248       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5249         ivs->num_used_inv_expr--;
5250     }
5251   iv_ca_recount_cost (data, ivs);
5252 }
5253
5254 /* Add invariants in set INVS to set IVS.  */
5255
5256 static void
5257 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5258 {
5259   bitmap_iterator bi;
5260   unsigned iid;
5261
5262   if (!invs)
5263     return;
5264
5265   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5266     {
5267       ivs->n_invariant_uses[iid]++;
5268       if (ivs->n_invariant_uses[iid] == 1)
5269         ivs->n_regs++;
5270     }
5271 }
5272
5273 /* Set cost pair for USE in set IVS to CP.  */
5274
5275 static void
5276 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5277               struct iv_use *use, struct cost_pair *cp)
5278 {
5279   unsigned uid = use->id, cid;
5280
5281   if (ivs->cand_for_use[uid] == cp)
5282     return;
5283
5284   if (ivs->cand_for_use[uid])
5285     iv_ca_set_no_cp (data, ivs, use);
5286
5287   if (cp)
5288     {
5289       cid = cp->cand->id;
5290
5291       ivs->bad_uses--;
5292       ivs->cand_for_use[uid] = cp;
5293       ivs->n_cand_uses[cid]++;
5294       if (ivs->n_cand_uses[cid] == 1)
5295         {
5296           bitmap_set_bit (ivs->cands, cid);
5297           /* Do not count the pseudocandidates.  */
5298           if (cp->cand->iv)
5299             ivs->n_regs++;
5300           ivs->n_cands++;
5301           ivs->cand_cost += cp->cand->cost;
5302
5303           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5304         }
5305
5306       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5307       iv_ca_set_add_invariants (ivs, cp->depends_on);
5308
5309       if (cp->inv_expr_id != -1)
5310         {
5311           ivs->used_inv_expr[cp->inv_expr_id]++;
5312           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5313             ivs->num_used_inv_expr++;
5314         }
5315       iv_ca_recount_cost (data, ivs);
5316     }
5317 }
5318
5319 /* Extend set IVS by expressing USE by some of the candidates in it
5320    if possible. All important candidates will be considered
5321    if IMPORTANT_CANDIDATES is true.  */
5322
5323 static void
5324 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5325                struct iv_use *use, bool important_candidates)
5326 {
5327   struct cost_pair *best_cp = NULL, *cp;
5328   bitmap_iterator bi;
5329   bitmap cands;
5330   unsigned i;
5331
5332   gcc_assert (ivs->upto >= use->id);
5333
5334   if (ivs->upto == use->id)
5335     {
5336       ivs->upto++;
5337       ivs->bad_uses++;
5338     }
5339
5340   cands = (important_candidates ? data->important_candidates : ivs->cands);
5341   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5342     {
5343       struct iv_cand *cand = iv_cand (data, i);
5344
5345       cp = get_use_iv_cost (data, use, cand);
5346
5347       if (cheaper_cost_pair (cp, best_cp))
5348         best_cp = cp;
5349     }
5350
5351   iv_ca_set_cp (data, ivs, use, best_cp);
5352 }
5353
5354 /* Get cost for assignment IVS.  */
5355
5356 static comp_cost
5357 iv_ca_cost (struct iv_ca *ivs)
5358 {
5359   /* This was a conditional expression but it triggered a bug in
5360      Sun C 5.5.  */
5361   if (ivs->bad_uses)
5362     return infinite_cost;
5363   else
5364     return ivs->cost;
5365 }
5366
5367 /* Returns true if all dependences of CP are among invariants in IVS.  */
5368
5369 static bool
5370 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5371 {
5372   unsigned i;
5373   bitmap_iterator bi;
5374
5375   if (!cp->depends_on)
5376     return true;
5377
5378   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5379     {
5380       if (ivs->n_invariant_uses[i] == 0)
5381         return false;
5382     }
5383
5384   return true;
5385 }
5386
5387 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5388    it before NEXT_CHANGE.  */
5389
5390 static struct iv_ca_delta *
5391 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5392                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5393 {
5394   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5395
5396   change->use = use;
5397   change->old_cp = old_cp;
5398   change->new_cp = new_cp;
5399   change->next_change = next_change;
5400
5401   return change;
5402 }
5403
5404 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5405    are rewritten.  */
5406
5407 static struct iv_ca_delta *
5408 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5409 {
5410   struct iv_ca_delta *last;
5411
5412   if (!l2)
5413     return l1;
5414
5415   if (!l1)
5416     return l2;
5417
5418   for (last = l1; last->next_change; last = last->next_change)
5419     continue;
5420   last->next_change = l2;
5421
5422   return l1;
5423 }
5424
5425 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5426
5427 static struct iv_ca_delta *
5428 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5429 {
5430   struct iv_ca_delta *act, *next, *prev = NULL;
5431   struct cost_pair *tmp;
5432
5433   for (act = delta; act; act = next)
5434     {
5435       next = act->next_change;
5436       act->next_change = prev;
5437       prev = act;
5438
5439       tmp = act->old_cp;
5440       act->old_cp = act->new_cp;
5441       act->new_cp = tmp;
5442     }
5443
5444   return prev;
5445 }
5446
5447 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5448    reverted instead.  */
5449
5450 static void
5451 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5452                     struct iv_ca_delta *delta, bool forward)
5453 {
5454   struct cost_pair *from, *to;
5455   struct iv_ca_delta *act;
5456
5457   if (!forward)
5458     delta = iv_ca_delta_reverse (delta);
5459
5460   for (act = delta; act; act = act->next_change)
5461     {
5462       from = act->old_cp;
5463       to = act->new_cp;
5464       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5465       iv_ca_set_cp (data, ivs, act->use, to);
5466     }
5467
5468   if (!forward)
5469     iv_ca_delta_reverse (delta);
5470 }
5471
5472 /* Returns true if CAND is used in IVS.  */
5473
5474 static bool
5475 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5476 {
5477   return ivs->n_cand_uses[cand->id] > 0;
5478 }
5479
5480 /* Returns number of induction variable candidates in the set IVS.  */
5481
5482 static unsigned
5483 iv_ca_n_cands (struct iv_ca *ivs)
5484 {
5485   return ivs->n_cands;
5486 }
5487
5488 /* Free the list of changes DELTA.  */
5489
5490 static void
5491 iv_ca_delta_free (struct iv_ca_delta **delta)
5492 {
5493   struct iv_ca_delta *act, *next;
5494
5495   for (act = *delta; act; act = next)
5496     {
5497       next = act->next_change;
5498       free (act);
5499     }
5500
5501   *delta = NULL;
5502 }
5503
5504 /* Allocates new iv candidates assignment.  */
5505
5506 static struct iv_ca *
5507 iv_ca_new (struct ivopts_data *data)
5508 {
5509   struct iv_ca *nw = XNEW (struct iv_ca);
5510
5511   nw->upto = 0;
5512   nw->bad_uses = 0;
5513   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5514   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5515   nw->cands = BITMAP_ALLOC (NULL);
5516   nw->n_cands = 0;
5517   nw->n_regs = 0;
5518   nw->cand_use_cost = no_cost;
5519   nw->cand_cost = 0;
5520   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5521   nw->cost = no_cost;
5522   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5523   nw->num_used_inv_expr = 0;
5524
5525   return nw;
5526 }
5527
5528 /* Free memory occupied by the set IVS.  */
5529
5530 static void
5531 iv_ca_free (struct iv_ca **ivs)
5532 {
5533   free ((*ivs)->cand_for_use);
5534   free ((*ivs)->n_cand_uses);
5535   BITMAP_FREE ((*ivs)->cands);
5536   free ((*ivs)->n_invariant_uses);
5537   free ((*ivs)->used_inv_expr);
5538   free (*ivs);
5539   *ivs = NULL;
5540 }
5541
5542 /* Dumps IVS to FILE.  */
5543
5544 static void
5545 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5546 {
5547   const char *pref = "  invariants ";
5548   unsigned i;
5549   comp_cost cost = iv_ca_cost (ivs);
5550
5551   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5552   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5553            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5554   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5555
5556    for (i = 0; i < ivs->upto; i++)
5557     {
5558       struct iv_use *use = iv_use (data, i);
5559       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5560       if (cp)
5561         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5562                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5563       else
5564         fprintf (file, "   use:%d --> ??\n", use->id);
5565     }
5566
5567   for (i = 1; i <= data->max_inv_id; i++)
5568     if (ivs->n_invariant_uses[i])
5569       {
5570         fprintf (file, "%s%d", pref, i);
5571         pref = ", ";
5572       }
5573   fprintf (file, "\n\n");
5574 }
5575
5576 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5577    new set, and store differences in DELTA.  Number of induction variables
5578    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5579    the function will try to find a solution with mimimal iv candidates.  */
5580
5581 static comp_cost
5582 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5583               struct iv_cand *cand, struct iv_ca_delta **delta,
5584               unsigned *n_ivs, bool min_ncand)
5585 {
5586   unsigned i;
5587   comp_cost cost;
5588   struct iv_use *use;
5589   struct cost_pair *old_cp, *new_cp;
5590
5591   *delta = NULL;
5592   for (i = 0; i < ivs->upto; i++)
5593     {
5594       use = iv_use (data, i);
5595       old_cp = iv_ca_cand_for_use (ivs, use);
5596
5597       if (old_cp
5598           && old_cp->cand == cand)
5599         continue;
5600
5601       new_cp = get_use_iv_cost (data, use, cand);
5602       if (!new_cp)
5603         continue;
5604
5605       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5606         continue;
5607
5608       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5609         continue;
5610
5611       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5612     }
5613
5614   iv_ca_delta_commit (data, ivs, *delta, true);
5615   cost = iv_ca_cost (ivs);
5616   if (n_ivs)
5617     *n_ivs = iv_ca_n_cands (ivs);
5618   iv_ca_delta_commit (data, ivs, *delta, false);
5619
5620   return cost;
5621 }
5622
5623 /* Try narrowing set IVS by removing CAND.  Return the cost of
5624    the new set and store the differences in DELTA.  */
5625
5626 static comp_cost
5627 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5628               struct iv_cand *cand, struct iv_ca_delta **delta)
5629 {
5630   unsigned i, ci;
5631   struct iv_use *use;
5632   struct cost_pair *old_cp, *new_cp, *cp;
5633   bitmap_iterator bi;
5634   struct iv_cand *cnd;
5635   comp_cost cost;
5636
5637   *delta = NULL;
5638   for (i = 0; i < n_iv_uses (data); i++)
5639     {
5640       use = iv_use (data, i);
5641
5642       old_cp = iv_ca_cand_for_use (ivs, use);
5643       if (old_cp->cand != cand)
5644         continue;
5645
5646       new_cp = NULL;
5647
5648       if (data->consider_all_candidates)
5649         {
5650           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5651             {
5652               if (ci == cand->id)
5653                 continue;
5654
5655               cnd = iv_cand (data, ci);
5656
5657               cp = get_use_iv_cost (data, use, cnd);
5658               if (!cp)
5659                 continue;
5660
5661               if (!iv_ca_has_deps (ivs, cp))
5662                 continue;
5663
5664               if (!cheaper_cost_pair (cp, new_cp))
5665                 continue;
5666
5667               new_cp = cp;
5668             }
5669         }
5670       else
5671         {
5672           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5673             {
5674               if (ci == cand->id)
5675                 continue;
5676
5677               cnd = iv_cand (data, ci);
5678
5679               cp = get_use_iv_cost (data, use, cnd);
5680               if (!cp)
5681                 continue;
5682               if (!iv_ca_has_deps (ivs, cp))
5683                 continue;
5684
5685               if (!cheaper_cost_pair (cp, new_cp))
5686                 continue;
5687
5688               new_cp = cp;
5689             }
5690         }
5691
5692       if (!new_cp)
5693         {
5694           iv_ca_delta_free (delta);
5695           return infinite_cost;
5696         }
5697
5698       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5699     }
5700
5701   iv_ca_delta_commit (data, ivs, *delta, true);
5702   cost = iv_ca_cost (ivs);
5703   iv_ca_delta_commit (data, ivs, *delta, false);
5704
5705   return cost;
5706 }
5707
5708 /* Try optimizing the set of candidates IVS by removing candidates different
5709    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5710    differences in DELTA.  */
5711
5712 static comp_cost
5713 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5714              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5715 {
5716   bitmap_iterator bi;
5717   struct iv_ca_delta *act_delta, *best_delta;
5718   unsigned i;
5719   comp_cost best_cost, acost;
5720   struct iv_cand *cand;
5721
5722   best_delta = NULL;
5723   best_cost = iv_ca_cost (ivs);
5724
5725   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5726     {
5727       cand = iv_cand (data, i);
5728
5729       if (cand == except_cand)
5730         continue;
5731
5732       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5733
5734       if (compare_costs (acost, best_cost) < 0)
5735         {
5736           best_cost = acost;
5737           iv_ca_delta_free (&best_delta);
5738           best_delta = act_delta;
5739         }
5740       else
5741         iv_ca_delta_free (&act_delta);
5742     }
5743
5744   if (!best_delta)
5745     {
5746       *delta = NULL;
5747       return best_cost;
5748     }
5749
5750   /* Recurse to possibly remove other unnecessary ivs.  */
5751   iv_ca_delta_commit (data, ivs, best_delta, true);
5752   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5753   iv_ca_delta_commit (data, ivs, best_delta, false);
5754   *delta = iv_ca_delta_join (best_delta, *delta);
5755   return best_cost;
5756 }
5757
5758 /* Tries to extend the sets IVS in the best possible way in order
5759    to express the USE.  If ORIGINALP is true, prefer candidates from
5760    the original set of IVs, otherwise favor important candidates not
5761    based on any memory object.  */
5762
5763 static bool
5764 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5765                   struct iv_use *use, bool originalp)
5766 {
5767   comp_cost best_cost, act_cost;
5768   unsigned i;
5769   bitmap_iterator bi;
5770   struct iv_cand *cand;
5771   struct iv_ca_delta *best_delta = NULL, *act_delta;
5772   struct cost_pair *cp;
5773
5774   iv_ca_add_use (data, ivs, use, false);
5775   best_cost = iv_ca_cost (ivs);
5776
5777   cp = iv_ca_cand_for_use (ivs, use);
5778   if (!cp)
5779     {
5780       ivs->upto--;
5781       ivs->bad_uses--;
5782       iv_ca_add_use (data, ivs, use, true);
5783       best_cost = iv_ca_cost (ivs);
5784       cp = iv_ca_cand_for_use (ivs, use);
5785     }
5786   if (cp)
5787     {
5788       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5789       iv_ca_set_no_cp (data, ivs, use);
5790     }
5791
5792   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5793      first try important candidates not based on any memory object.  Only if
5794      this fails, try the specific ones.  Rationale -- in loops with many
5795      variables the best choice often is to use just one generic biv.  If we
5796      added here many ivs specific to the uses, the optimization algorithm later
5797      would be likely to get stuck in a local minimum, thus causing us to create
5798      too many ivs.  The approach from few ivs to more seems more likely to be
5799      successful -- starting from few ivs, replacing an expensive use by a
5800      specific iv should always be a win.  */
5801   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5802     {
5803       cand = iv_cand (data, i);
5804
5805       if (originalp && cand->pos !=IP_ORIGINAL)
5806         continue;
5807
5808       if (!originalp && cand->iv->base_object != NULL_TREE)
5809         continue;
5810
5811       if (iv_ca_cand_used_p (ivs, cand))
5812         continue;
5813
5814       cp = get_use_iv_cost (data, use, cand);
5815       if (!cp)
5816         continue;
5817
5818       iv_ca_set_cp (data, ivs, use, cp);
5819       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5820                                true);
5821       iv_ca_set_no_cp (data, ivs, use);
5822       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5823
5824       if (compare_costs (act_cost, best_cost) < 0)
5825         {
5826           best_cost = act_cost;
5827
5828           iv_ca_delta_free (&best_delta);
5829           best_delta = act_delta;
5830         }
5831       else
5832         iv_ca_delta_free (&act_delta);
5833     }
5834
5835   if (infinite_cost_p (best_cost))
5836     {
5837       for (i = 0; i < use->n_map_members; i++)
5838         {
5839           cp = use->cost_map + i;
5840           cand = cp->cand;
5841           if (!cand)
5842             continue;
5843
5844           /* Already tried this.  */
5845           if (cand->important)
5846             {
5847               if (originalp && cand->pos == IP_ORIGINAL)
5848                 continue;
5849               if (!originalp && cand->iv->base_object == NULL_TREE)
5850                 continue;
5851             }
5852
5853           if (iv_ca_cand_used_p (ivs, cand))
5854             continue;
5855
5856           act_delta = NULL;
5857           iv_ca_set_cp (data, ivs, use, cp);
5858           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5859           iv_ca_set_no_cp (data, ivs, use);
5860           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5861                                        cp, act_delta);
5862
5863           if (compare_costs (act_cost, best_cost) < 0)
5864             {
5865               best_cost = act_cost;
5866
5867               if (best_delta)
5868                 iv_ca_delta_free (&best_delta);
5869               best_delta = act_delta;
5870             }
5871           else
5872             iv_ca_delta_free (&act_delta);
5873         }
5874     }
5875
5876   iv_ca_delta_commit (data, ivs, best_delta, true);
5877   iv_ca_delta_free (&best_delta);
5878
5879   return !infinite_cost_p (best_cost);
5880 }
5881
5882 /* Finds an initial assignment of candidates to uses.  */
5883
5884 static struct iv_ca *
5885 get_initial_solution (struct ivopts_data *data, bool originalp)
5886 {
5887   struct iv_ca *ivs = iv_ca_new (data);
5888   unsigned i;
5889
5890   for (i = 0; i < n_iv_uses (data); i++)
5891     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5892       {
5893         iv_ca_free (&ivs);
5894         return NULL;
5895       }
5896
5897   return ivs;
5898 }
5899
5900 /* Tries to improve set of induction variables IVS.  */
5901
5902 static bool
5903 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5904 {
5905   unsigned i, n_ivs;
5906   comp_cost acost, best_cost = iv_ca_cost (ivs);
5907   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5908   struct iv_cand *cand;
5909
5910   /* Try extending the set of induction variables by one.  */
5911   for (i = 0; i < n_iv_cands (data); i++)
5912     {
5913       cand = iv_cand (data, i);
5914
5915       if (iv_ca_cand_used_p (ivs, cand))
5916         continue;
5917
5918       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5919       if (!act_delta)
5920         continue;
5921
5922       /* If we successfully added the candidate and the set is small enough,
5923          try optimizing it by removing other candidates.  */
5924       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5925         {
5926           iv_ca_delta_commit (data, ivs, act_delta, true);
5927           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5928           iv_ca_delta_commit (data, ivs, act_delta, false);
5929           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5930         }
5931
5932       if (compare_costs (acost, best_cost) < 0)
5933         {
5934           best_cost = acost;
5935           iv_ca_delta_free (&best_delta);
5936           best_delta = act_delta;
5937         }
5938       else
5939         iv_ca_delta_free (&act_delta);
5940     }
5941
5942   if (!best_delta)
5943     {
5944       /* Try removing the candidates from the set instead.  */
5945       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5946
5947       /* Nothing more we can do.  */
5948       if (!best_delta)
5949         return false;
5950     }
5951
5952   iv_ca_delta_commit (data, ivs, best_delta, true);
5953   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5954   iv_ca_delta_free (&best_delta);
5955   return true;
5956 }
5957
5958 /* Attempts to find the optimal set of induction variables.  We do simple
5959    greedy heuristic -- we try to replace at most one candidate in the selected
5960    solution and remove the unused ivs while this improves the cost.  */
5961
5962 static struct iv_ca *
5963 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5964 {
5965   struct iv_ca *set;
5966
5967   /* Get the initial solution.  */
5968   set = get_initial_solution (data, originalp);
5969   if (!set)
5970     {
5971       if (dump_file && (dump_flags & TDF_DETAILS))
5972         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5973       return NULL;
5974     }
5975
5976   if (dump_file && (dump_flags & TDF_DETAILS))
5977     {
5978       fprintf (dump_file, "Initial set of candidates:\n");
5979       iv_ca_dump (data, dump_file, set);
5980     }
5981
5982   while (try_improve_iv_set (data, set))
5983     {
5984       if (dump_file && (dump_flags & TDF_DETAILS))
5985         {
5986           fprintf (dump_file, "Improved to:\n");
5987           iv_ca_dump (data, dump_file, set);
5988         }
5989     }
5990
5991   return set;
5992 }
5993
5994 static struct iv_ca *
5995 find_optimal_iv_set (struct ivopts_data *data)
5996 {
5997   unsigned i;
5998   struct iv_ca *set, *origset;
5999   struct iv_use *use;
6000   comp_cost cost, origcost;
6001
6002   /* Determine the cost based on a strategy that starts with original IVs,
6003      and try again using a strategy that prefers candidates not based
6004      on any IVs.  */
6005   origset = find_optimal_iv_set_1 (data, true);
6006   set = find_optimal_iv_set_1 (data, false);
6007
6008   if (!origset && !set)
6009     return NULL;
6010
6011   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6012   cost = set ? iv_ca_cost (set) : infinite_cost;
6013
6014   if (dump_file && (dump_flags & TDF_DETAILS))
6015     {
6016       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6017                origcost.cost, origcost.complexity);
6018       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6019                cost.cost, cost.complexity);
6020     }
6021
6022   /* Choose the one with the best cost.  */
6023   if (compare_costs (origcost, cost) <= 0)
6024     {
6025       if (set)
6026         iv_ca_free (&set);
6027       set = origset;
6028     }
6029   else if (origset)
6030     iv_ca_free (&origset);
6031
6032   for (i = 0; i < n_iv_uses (data); i++)
6033     {
6034       use = iv_use (data, i);
6035       use->selected = iv_ca_cand_for_use (set, use)->cand;
6036     }
6037
6038   return set;
6039 }
6040
6041 /* Creates a new induction variable corresponding to CAND.  */
6042
6043 static void
6044 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6045 {
6046   gimple_stmt_iterator incr_pos;
6047   tree base;
6048   bool after = false;
6049
6050   if (!cand->iv)
6051     return;
6052
6053   switch (cand->pos)
6054     {
6055     case IP_NORMAL:
6056       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6057       break;
6058
6059     case IP_END:
6060       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6061       after = true;
6062       break;
6063
6064     case IP_AFTER_USE:
6065       after = true;
6066       /* fall through */
6067     case IP_BEFORE_USE:
6068       incr_pos = gsi_for_stmt (cand->incremented_at);
6069       break;
6070
6071     case IP_ORIGINAL:
6072       /* Mark that the iv is preserved.  */
6073       name_info (data, cand->var_before)->preserve_biv = true;
6074       name_info (data, cand->var_after)->preserve_biv = true;
6075
6076       /* Rewrite the increment so that it uses var_before directly.  */
6077       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6078       return;
6079     }
6080
6081   gimple_add_tmp_var (cand->var_before);
6082
6083   base = unshare_expr (cand->iv->base);
6084
6085   create_iv (base, unshare_expr (cand->iv->step),
6086              cand->var_before, data->current_loop,
6087              &incr_pos, after, &cand->var_before, &cand->var_after);
6088 }
6089
6090 /* Creates new induction variables described in SET.  */
6091
6092 static void
6093 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6094 {
6095   unsigned i;
6096   struct iv_cand *cand;
6097   bitmap_iterator bi;
6098
6099   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6100     {
6101       cand = iv_cand (data, i);
6102       create_new_iv (data, cand);
6103     }
6104
6105   if (dump_file && (dump_flags & TDF_DETAILS))
6106     {
6107       fprintf (dump_file, "\nSelected IV set: \n");
6108       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6109         {
6110           cand = iv_cand (data, i);
6111           dump_cand (dump_file, cand);
6112         }
6113       fprintf (dump_file, "\n");
6114     }
6115 }
6116
6117 /* Rewrites USE (definition of iv used in a nonlinear expression)
6118    using candidate CAND.  */
6119
6120 static void
6121 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6122                             struct iv_use *use, struct iv_cand *cand)
6123 {
6124   tree comp;
6125   tree op, tgt;
6126   gimple ass;
6127   gimple_stmt_iterator bsi;
6128
6129   /* An important special case -- if we are asked to express value of
6130      the original iv by itself, just exit; there is no need to
6131      introduce a new computation (that might also need casting the
6132      variable to unsigned and back).  */
6133   if (cand->pos == IP_ORIGINAL
6134       && cand->incremented_at == use->stmt)
6135     {
6136       enum tree_code stmt_code;
6137
6138       gcc_assert (is_gimple_assign (use->stmt));
6139       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6140
6141       /* Check whether we may leave the computation unchanged.
6142          This is the case only if it does not rely on other
6143          computations in the loop -- otherwise, the computation
6144          we rely upon may be removed in remove_unused_ivs,
6145          thus leading to ICE.  */
6146       stmt_code = gimple_assign_rhs_code (use->stmt);
6147       if (stmt_code == PLUS_EXPR
6148           || stmt_code == MINUS_EXPR
6149           || stmt_code == POINTER_PLUS_EXPR)
6150         {
6151           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6152             op = gimple_assign_rhs2 (use->stmt);
6153           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6154             op = gimple_assign_rhs1 (use->stmt);
6155           else
6156             op = NULL_TREE;
6157         }
6158       else
6159         op = NULL_TREE;
6160
6161       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6162         return;
6163     }
6164
6165   comp = get_computation (data->current_loop, use, cand);
6166   gcc_assert (comp != NULL_TREE);
6167
6168   switch (gimple_code (use->stmt))
6169     {
6170     case GIMPLE_PHI:
6171       tgt = PHI_RESULT (use->stmt);
6172
6173       /* If we should keep the biv, do not replace it.  */
6174       if (name_info (data, tgt)->preserve_biv)
6175         return;
6176
6177       bsi = gsi_after_labels (gimple_bb (use->stmt));
6178       break;
6179
6180     case GIMPLE_ASSIGN:
6181       tgt = gimple_assign_lhs (use->stmt);
6182       bsi = gsi_for_stmt (use->stmt);
6183       break;
6184
6185     default:
6186       gcc_unreachable ();
6187     }
6188
6189   if (!valid_gimple_rhs_p (comp)
6190       || (gimple_code (use->stmt) != GIMPLE_PHI
6191           /* We can't allow re-allocating the stmt as it might be pointed
6192              to still.  */
6193           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6194               >= gimple_num_ops (gsi_stmt (bsi)))))
6195     {
6196       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6197                                        true, GSI_SAME_STMT);
6198       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6199         {
6200           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6201           /* As this isn't a plain copy we have to reset alignment
6202              information.  */
6203           if (SSA_NAME_PTR_INFO (comp))
6204             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6205         }
6206     }
6207
6208   if (gimple_code (use->stmt) == GIMPLE_PHI)
6209     {
6210       ass = gimple_build_assign (tgt, comp);
6211       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6212
6213       bsi = gsi_for_stmt (use->stmt);
6214       remove_phi_node (&bsi, false);
6215     }
6216   else
6217     {
6218       gimple_assign_set_rhs_from_tree (&bsi, comp);
6219       use->stmt = gsi_stmt (bsi);
6220     }
6221 }
6222
6223 /* Performs a peephole optimization to reorder the iv update statement with
6224    a mem ref to enable instruction combining in later phases. The mem ref uses
6225    the iv value before the update, so the reordering transformation requires
6226    adjustment of the offset. CAND is the selected IV_CAND.
6227
6228    Example:
6229
6230    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6231    iv2 = iv1 + 1;
6232
6233    if (t < val)      (1)
6234      goto L;
6235    goto Head;
6236
6237
6238    directly propagating t over to (1) will introduce overlapping live range
6239    thus increase register pressure. This peephole transform it into:
6240
6241
6242    iv2 = iv1 + 1;
6243    t = MEM_REF (base, iv2, 8, 8);
6244    if (t < val)
6245      goto L;
6246    goto Head;
6247 */
6248
6249 static void
6250 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6251 {
6252   tree var_after;
6253   gimple iv_update, stmt;
6254   basic_block bb;
6255   gimple_stmt_iterator gsi, gsi_iv;
6256
6257   if (cand->pos != IP_NORMAL)
6258     return;
6259
6260   var_after = cand->var_after;
6261   iv_update = SSA_NAME_DEF_STMT (var_after);
6262
6263   bb = gimple_bb (iv_update);
6264   gsi = gsi_last_nondebug_bb (bb);
6265   stmt = gsi_stmt (gsi);
6266
6267   /* Only handle conditional statement for now.  */
6268   if (gimple_code (stmt) != GIMPLE_COND)
6269     return;
6270
6271   gsi_prev_nondebug (&gsi);
6272   stmt = gsi_stmt (gsi);
6273   if (stmt != iv_update)
6274     return;
6275
6276   gsi_prev_nondebug (&gsi);
6277   if (gsi_end_p (gsi))
6278     return;
6279
6280   stmt = gsi_stmt (gsi);
6281   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6282     return;
6283
6284   if (stmt != use->stmt)
6285     return;
6286
6287   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6288     return;
6289
6290   if (dump_file && (dump_flags & TDF_DETAILS))
6291     {
6292       fprintf (dump_file, "Reordering \n");
6293       print_gimple_stmt (dump_file, iv_update, 0, 0);
6294       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6295       fprintf (dump_file, "\n");
6296     }
6297
6298   gsi = gsi_for_stmt (use->stmt);
6299   gsi_iv = gsi_for_stmt (iv_update);
6300   gsi_move_before (&gsi_iv, &gsi);
6301
6302   cand->pos = IP_BEFORE_USE;
6303   cand->incremented_at = use->stmt;
6304 }
6305
6306 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6307
6308 static void
6309 rewrite_use_address (struct ivopts_data *data,
6310                      struct iv_use *use, struct iv_cand *cand)
6311 {
6312   aff_tree aff;
6313   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6314   tree base_hint = NULL_TREE;
6315   tree ref, iv;
6316   bool ok;
6317
6318   adjust_iv_update_pos (cand, use);
6319   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6320   gcc_assert (ok);
6321   unshare_aff_combination (&aff);
6322
6323   /* To avoid undefined overflow problems, all IV candidates use unsigned
6324      integer types.  The drawback is that this makes it impossible for
6325      create_mem_ref to distinguish an IV that is based on a memory object
6326      from one that represents simply an offset.
6327
6328      To work around this problem, we pass a hint to create_mem_ref that
6329      indicates which variable (if any) in aff is an IV based on a memory
6330      object.  Note that we only consider the candidate.  If this is not
6331      based on an object, the base of the reference is in some subexpression
6332      of the use -- but these will use pointer types, so they are recognized
6333      by the create_mem_ref heuristics anyway.  */
6334   if (cand->iv->base_object)
6335     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6336
6337   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6338   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6339                         reference_alias_ptr_type (*use->op_p),
6340                         iv, base_hint, data->speed);
6341   copy_ref_info (ref, *use->op_p);
6342   *use->op_p = ref;
6343 }
6344
6345 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6346    candidate CAND.  */
6347
6348 static void
6349 rewrite_use_compare (struct ivopts_data *data,
6350                      struct iv_use *use, struct iv_cand *cand)
6351 {
6352   tree comp, *var_p, op, bound;
6353   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6354   enum tree_code compare;
6355   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6356   bool ok;
6357
6358   bound = cp->value;
6359   if (bound)
6360     {
6361       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6362       tree var_type = TREE_TYPE (var);
6363       gimple_seq stmts;
6364
6365       if (dump_file && (dump_flags & TDF_DETAILS))
6366         {
6367           fprintf (dump_file, "Replacing exit test: ");
6368           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6369         }
6370       compare = cp->comp;
6371       bound = unshare_expr (fold_convert (var_type, bound));
6372       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6373       if (stmts)
6374         gsi_insert_seq_on_edge_immediate (
6375                 loop_preheader_edge (data->current_loop),
6376                 stmts);
6377
6378       gimple_cond_set_lhs (use->stmt, var);
6379       gimple_cond_set_code (use->stmt, compare);
6380       gimple_cond_set_rhs (use->stmt, op);
6381       return;
6382     }
6383
6384   /* The induction variable elimination failed; just express the original
6385      giv.  */
6386   comp = get_computation (data->current_loop, use, cand);
6387   gcc_assert (comp != NULL_TREE);
6388
6389   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6390   gcc_assert (ok);
6391
6392   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6393                                      true, GSI_SAME_STMT);
6394 }
6395
6396 /* Rewrites USE using candidate CAND.  */
6397
6398 static void
6399 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6400 {
6401   switch (use->type)
6402     {
6403       case USE_NONLINEAR_EXPR:
6404         rewrite_use_nonlinear_expr (data, use, cand);
6405         break;
6406
6407       case USE_ADDRESS:
6408         rewrite_use_address (data, use, cand);
6409         break;
6410
6411       case USE_COMPARE:
6412         rewrite_use_compare (data, use, cand);
6413         break;
6414
6415       default:
6416         gcc_unreachable ();
6417     }
6418
6419   update_stmt (use->stmt);
6420 }
6421
6422 /* Rewrite the uses using the selected induction variables.  */
6423
6424 static void
6425 rewrite_uses (struct ivopts_data *data)
6426 {
6427   unsigned i;
6428   struct iv_cand *cand;
6429   struct iv_use *use;
6430
6431   for (i = 0; i < n_iv_uses (data); i++)
6432     {
6433       use = iv_use (data, i);
6434       cand = use->selected;
6435       gcc_assert (cand);
6436
6437       rewrite_use (data, use, cand);
6438     }
6439 }
6440
6441 /* Removes the ivs that are not used after rewriting.  */
6442
6443 static void
6444 remove_unused_ivs (struct ivopts_data *data)
6445 {
6446   unsigned j;
6447   bitmap_iterator bi;
6448   bitmap toremove = BITMAP_ALLOC (NULL);
6449
6450   /* Figure out an order in which to release SSA DEFs so that we don't
6451      release something that we'd have to propagate into a debug stmt
6452      afterwards.  */
6453   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6454     {
6455       struct version_info *info;
6456
6457       info = ver_info (data, j);
6458       if (info->iv
6459           && !integer_zerop (info->iv->step)
6460           && !info->inv_id
6461           && !info->iv->have_use_for
6462           && !info->preserve_biv)
6463         {
6464           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6465
6466           tree def = info->iv->ssa_name;
6467
6468           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6469             {
6470               imm_use_iterator imm_iter;
6471               use_operand_p use_p;
6472               gimple stmt;
6473               int count = 0;
6474
6475               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6476                 {
6477                   if (!gimple_debug_bind_p (stmt))
6478                     continue;
6479
6480                   /* We just want to determine whether to do nothing
6481                      (count == 0), to substitute the computed
6482                      expression into a single use of the SSA DEF by
6483                      itself (count == 1), or to use a debug temp
6484                      because the SSA DEF is used multiple times or as
6485                      part of a larger expression (count > 1). */
6486                   count++;
6487                   if (gimple_debug_bind_get_value (stmt) != def)
6488                     count++;
6489
6490                   if (count > 1)
6491                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6492                 }
6493
6494               if (!count)
6495                 continue;
6496
6497               struct iv_use dummy_use;
6498               struct iv_cand *best_cand = NULL, *cand;
6499               unsigned i, best_pref = 0, cand_pref;
6500
6501               memset (&dummy_use, 0, sizeof (dummy_use));
6502               dummy_use.iv = info->iv;
6503               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6504                 {
6505                   cand = iv_use (data, i)->selected;
6506                   if (cand == best_cand)
6507                     continue;
6508                   cand_pref = operand_equal_p (cand->iv->step,
6509                                                info->iv->step, 0)
6510                     ? 4 : 0;
6511                   cand_pref
6512                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6513                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6514                     ? 2 : 0;
6515                   cand_pref
6516                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6517                     ? 1 : 0;
6518                   if (best_cand == NULL || best_pref < cand_pref)
6519                     {
6520                       best_cand = cand;
6521                       best_pref = cand_pref;
6522                     }
6523                 }
6524
6525               if (!best_cand)
6526                 continue;
6527
6528               tree comp = get_computation_at (data->current_loop,
6529                                               &dummy_use, best_cand,
6530                                               SSA_NAME_DEF_STMT (def));
6531               if (!comp)
6532                 continue;
6533
6534               if (count > 1)
6535                 {
6536                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6537                   DECL_ARTIFICIAL (vexpr) = 1;
6538                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6539                   if (SSA_NAME_VAR (def))
6540                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6541                   else
6542                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6543                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6544                   gimple_stmt_iterator gsi;
6545
6546                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6547                     gsi = gsi_after_labels (gimple_bb
6548                                             (SSA_NAME_DEF_STMT (def)));
6549                   else
6550                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6551
6552                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6553                   comp = vexpr;
6554                 }
6555
6556               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6557                 {
6558                   if (!gimple_debug_bind_p (stmt))
6559                     continue;
6560
6561                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6562                     SET_USE (use_p, comp);
6563
6564                   update_stmt (stmt);
6565                 }
6566             }
6567         }
6568     }
6569
6570   release_defs_bitset (toremove);
6571
6572   BITMAP_FREE (toremove);
6573 }
6574
6575 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6576    for pointer_map_traverse.  */
6577
6578 static bool
6579 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6580                       void *data ATTRIBUTE_UNUSED)
6581 {
6582   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6583
6584   free (niter);
6585   return true;
6586 }
6587
6588 /* Frees data allocated by the optimization of a single loop.  */
6589
6590 static void
6591 free_loop_data (struct ivopts_data *data)
6592 {
6593   unsigned i, j;
6594   bitmap_iterator bi;
6595   tree obj;
6596
6597   if (data->niters)
6598     {
6599       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6600       pointer_map_destroy (data->niters);
6601       data->niters = NULL;
6602     }
6603
6604   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6605     {
6606       struct version_info *info;
6607
6608       info = ver_info (data, i);
6609       free (info->iv);
6610       info->iv = NULL;
6611       info->has_nonlin_use = false;
6612       info->preserve_biv = false;
6613       info->inv_id = 0;
6614     }
6615   bitmap_clear (data->relevant);
6616   bitmap_clear (data->important_candidates);
6617
6618   for (i = 0; i < n_iv_uses (data); i++)
6619     {
6620       struct iv_use *use = iv_use (data, i);
6621
6622       free (use->iv);
6623       BITMAP_FREE (use->related_cands);
6624       for (j = 0; j < use->n_map_members; j++)
6625         if (use->cost_map[j].depends_on)
6626           BITMAP_FREE (use->cost_map[j].depends_on);
6627       free (use->cost_map);
6628       free (use);
6629     }
6630   data->iv_uses.truncate (0);
6631
6632   for (i = 0; i < n_iv_cands (data); i++)
6633     {
6634       struct iv_cand *cand = iv_cand (data, i);
6635
6636       free (cand->iv);
6637       if (cand->depends_on)
6638         BITMAP_FREE (cand->depends_on);
6639       free (cand);
6640     }
6641   data->iv_candidates.truncate (0);
6642
6643   if (data->version_info_size < num_ssa_names)
6644     {
6645       data->version_info_size = 2 * num_ssa_names;
6646       free (data->version_info);
6647       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6648     }
6649
6650   data->max_inv_id = 0;
6651
6652   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6653     SET_DECL_RTL (obj, NULL_RTX);
6654
6655   decl_rtl_to_reset.truncate (0);
6656
6657   data->inv_expr_tab.empty ();
6658   data->inv_expr_id = 0;
6659 }
6660
6661 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6662    loop tree.  */
6663
6664 static void
6665 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6666 {
6667   free_loop_data (data);
6668   free (data->version_info);
6669   BITMAP_FREE (data->relevant);
6670   BITMAP_FREE (data->important_candidates);
6671
6672   decl_rtl_to_reset.release ();
6673   data->iv_uses.release ();
6674   data->iv_candidates.release ();
6675   data->inv_expr_tab.dispose ();
6676 }
6677
6678 /* Returns true if the loop body BODY includes any function calls.  */
6679
6680 static bool
6681 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6682 {
6683   gimple_stmt_iterator gsi;
6684   unsigned i;
6685
6686   for (i = 0; i < num_nodes; i++)
6687     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6688       {
6689         gimple stmt = gsi_stmt (gsi);
6690         if (is_gimple_call (stmt)
6691             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6692           return true;
6693       }
6694   return false;
6695 }
6696
6697 /* Optimizes the LOOP.  Returns true if anything changed.  */
6698
6699 static bool
6700 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6701 {
6702   bool changed = false;
6703   struct iv_ca *iv_ca;
6704   edge exit = single_dom_exit (loop);
6705   basic_block *body;
6706
6707   gcc_assert (!data->niters);
6708   data->current_loop = loop;
6709   data->speed = optimize_loop_for_speed_p (loop);
6710
6711   if (dump_file && (dump_flags & TDF_DETAILS))
6712     {
6713       fprintf (dump_file, "Processing loop %d\n", loop->num);
6714
6715       if (exit)
6716         {
6717           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6718                    exit->src->index, exit->dest->index);
6719           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6720           fprintf (dump_file, "\n");
6721         }
6722
6723       fprintf (dump_file, "\n");
6724     }
6725
6726   body = get_loop_body (loop);
6727   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6728   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6729   free (body);
6730
6731   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6732
6733   /* For each ssa name determines whether it behaves as an induction variable
6734      in some loop.  */
6735   if (!find_induction_variables (data))
6736     goto finish;
6737
6738   /* Finds interesting uses (item 1).  */
6739   find_interesting_uses (data);
6740   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6741     goto finish;
6742
6743   /* Finds candidates for the induction variables (item 2).  */
6744   find_iv_candidates (data);
6745
6746   /* Calculates the costs (item 3, part 1).  */
6747   determine_iv_costs (data);
6748   determine_use_iv_costs (data);
6749   determine_set_costs (data);
6750
6751   /* Find the optimal set of induction variables (item 3, part 2).  */
6752   iv_ca = find_optimal_iv_set (data);
6753   if (!iv_ca)
6754     goto finish;
6755   changed = true;
6756
6757   /* Create the new induction variables (item 4, part 1).  */
6758   create_new_ivs (data, iv_ca);
6759   iv_ca_free (&iv_ca);
6760
6761   /* Rewrite the uses (item 4, part 2).  */
6762   rewrite_uses (data);
6763
6764   /* Remove the ivs that are unused after rewriting.  */
6765   remove_unused_ivs (data);
6766
6767   /* We have changed the structure of induction variables; it might happen
6768      that definitions in the scev database refer to some of them that were
6769      eliminated.  */
6770   scev_reset ();
6771
6772 finish:
6773   free_loop_data (data);
6774
6775   return changed;
6776 }
6777
6778 /* Main entry point.  Optimizes induction variables in loops.  */
6779
6780 void
6781 tree_ssa_iv_optimize (void)
6782 {
6783   struct loop *loop;
6784   struct ivopts_data data;
6785   loop_iterator li;
6786
6787   tree_ssa_iv_optimize_init (&data);
6788
6789   /* Optimize the loops starting with the innermost ones.  */
6790   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6791     {
6792       if (dump_file && (dump_flags & TDF_DETAILS))
6793         flow_loop_dump (loop, dump_file, NULL, 1);
6794
6795       tree_ssa_iv_optimize_loop (&data, loop);
6796     }
6797
6798   tree_ssa_iv_optimize_finalize (&data);
6799 }