gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "tree-ssa.h"
  73 #include "cfgloop.h"
  74 #include "tree-pass.h"
  75 #include "ggc.h"
  76 #include "insn-config.h"
  77 #include "pointer-set.h"
  78 #include "hash-table.h"
  79 #include "tree-chrec.h"
  80 #include "tree-scalar-evolution.h"
  81 #include "cfgloop.h"
  82 #include "params.h"
  83 #include "langhooks.h"
  84 #include "tree-affine.h"
  85 #include "target.h"
  86 #include "tree-inline.h"
  87 #include "tree-ssa-propagate.h"
  88 #include "expmed.h"
  89
  90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  91    cost of different addressing modes.  This should be moved to a TBD
  92    interface between the GIMPLE and RTL worlds.  */
  93 #include "expr.h"
  94 #include "recog.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101 /* Returns the expected number of loop iterations for LOOP.
 102    The average trip count is computed from profile data if it
 103    exists. */
 104
 105 static inline HOST_WIDE_INT
 106 avg_loop_niter (struct loop *loop)
 107 {
 108   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 109   if (niter == -1)
 110     return AVG_LOOP_NITER (loop);
 111
 112   return niter;
 113 }
 114
 115 /* Representation of the induction variable.  */
 116 struct iv
 117 {
 118   tree base;            /* Initial value of the iv.  */
 119   tree base_object;     /* A memory object to that the induction variable points.  */
 120   tree step;            /* Step of the iv (constant only).  */
 121   tree ssa_name;        /* The ssa name with the value.  */
 122   bool biv_p;           /* Is it a biv?  */
 123   bool have_use_for;    /* Do we already have a use for it?  */
 124   unsigned use_id;      /* The identifier in the use if it is the case.  */
 125 };
 126
 127 /* Per-ssa version information (induction variable descriptions, etc.).  */
 128 struct version_info
 129 {
 130   tree name;            /* The ssa name.  */
 131   struct iv *iv;        /* Induction variable description.  */
 132   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 133                            an expression that is not an induction variable.  */
 134   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 135   unsigned inv_id;      /* Id of an invariant.  */
 136 };
 137
 138 /* Types of uses.  */
 139 enum use_type
 140 {
 141   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 142   USE_ADDRESS,          /* Use in an address.  */
 143   USE_COMPARE           /* Use is a compare.  */
 144 };
 145
 146 /* Cost of a computation.  */
 147 typedef struct
 148 {
 149   int cost;             /* The runtime cost.  */
 150   unsigned complexity;  /* The estimate of the complexity of the code for
 151                            the computation (in no concrete units --
 152                            complexity field should be larger for more
 153                            complex expressions and addressing modes).  */
 154 } comp_cost;
 155
 156 static const comp_cost no_cost = {0, 0};
 157 static const comp_cost infinite_cost = {INFTY, INFTY};
 158
 159 /* The candidate - cost pair.  */
 160 struct cost_pair
 161 {
 162   struct iv_cand *cand; /* The candidate.  */
 163   comp_cost cost;       /* The cost.  */
 164   bitmap depends_on;    /* The list of invariants that have to be
 165                            preserved.  */
 166   tree value;           /* For final value elimination, the expression for
 167                            the final value of the iv.  For iv elimination,
 168                            the new bound to compare with.  */
 169   enum tree_code comp;  /* For iv elimination, the comparison.  */
 170   int inv_expr_id;      /* Loop invariant expression id.  */
 171 };
 172
 173 /* Use.  */
 174 struct iv_use
 175 {
 176   unsigned id;          /* The id of the use.  */
 177   enum use_type type;   /* Type of the use.  */
 178   struct iv *iv;        /* The induction variable it is based on.  */
 179   gimple stmt;          /* Statement in that it occurs.  */
 180   tree *op_p;           /* The place where it occurs.  */
 181   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 182                            important ones.  */
 183
 184   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 185   struct cost_pair *cost_map;
 186                         /* The costs wrto the iv candidates.  */
 187
 188   struct iv_cand *selected;
 189                         /* The selected candidate.  */
 190 };
 191
 192 /* The position where the iv is computed.  */
 193 enum iv_position
 194 {
 195   IP_NORMAL,            /* At the end, just before the exit condition.  */
 196   IP_END,               /* At the end of the latch block.  */
 197   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 198   IP_AFTER_USE,         /* Immediately after a specific use.  */
 199   IP_ORIGINAL           /* The original biv.  */
 200 };
 201
 202 /* The induction variable candidate.  */
 203 struct iv_cand
 204 {
 205   unsigned id;          /* The number of the candidate.  */
 206   bool important;       /* Whether this is an "important" candidate, i.e. such
 207                            that it should be considered by all uses.  */
 208   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 209   gimple incremented_at;/* For original biv, the statement where it is
 210                            incremented.  */
 211   tree var_before;      /* The variable used for it before increment.  */
 212   tree var_after;       /* The variable used for it after increment.  */
 213   struct iv *iv;        /* The value of the candidate.  NULL for
 214                            "pseudocandidate" used to indicate the possibility
 215                            to replace the final value of an iv by direct
 216                            computation of the value.  */
 217   unsigned cost;        /* Cost of the candidate.  */
 218   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 219   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 220                               where it is incremented.  */
 221   bitmap depends_on;    /* The list of invariants that are used in step of the
 222                            biv.  */
 223 };
 224
 225 /* Loop invariant expression hashtable entry.  */
 226 struct iv_inv_expr_ent
 227 {
 228   tree expr;
 229   int id;
 230   hashval_t hash;
 231 };
 232
 233 /* The data used by the induction variable optimizations.  */
 234
 235 typedef struct iv_use *iv_use_p;
 236
 237 typedef struct iv_cand *iv_cand_p;
 238
 239 /* Hashtable helpers.  */
 240
 241 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 242 {
 243   typedef iv_inv_expr_ent value_type;
 244   typedef iv_inv_expr_ent compare_type;
 245   static inline hashval_t hash (const value_type *);
 246   static inline bool equal (const value_type *, const compare_type *);
 247 };
 248
 249 /* Hash function for loop invariant expressions.  */
 250
 251 inline hashval_t
 252 iv_inv_expr_hasher::hash (const value_type *expr)
 253 {
 254   return expr->hash;
 255 }
 256
 257 /* Hash table equality function for expressions.  */
 258
 259 inline bool
 260 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 261 {
 262   return expr1->hash == expr2->hash
 263          && operand_equal_p (expr1->expr, expr2->expr, 0);
 264 }
 265
 266 struct ivopts_data
 267 {
 268   /* The currently optimized loop.  */
 269   struct loop *current_loop;
 270
 271   /* Numbers of iterations for all exits of the current loop.  */
 272   struct pointer_map_t *niters;
 273
 274   /* Number of registers used in it.  */
 275   unsigned regs_used;
 276
 277   /* The size of version_info array allocated.  */
 278   unsigned version_info_size;
 279
 280   /* The array of information for the ssa names.  */
 281   struct version_info *version_info;
 282
 283   /* The hashtable of loop invariant expressions created
 284      by ivopt.  */
 285   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 286
 287   /* Loop invariant expression id.  */
 288   int inv_expr_id;
 289
 290   /* The bitmap of indices in version_info whose value was changed.  */
 291   bitmap relevant;
 292
 293   /* The uses of induction variables.  */
 294   vec<iv_use_p> iv_uses;
 295
 296   /* The candidates.  */
 297   vec<iv_cand_p> iv_candidates;
 298
 299   /* A bitmap of important candidates.  */
 300   bitmap important_candidates;
 301
 302   /* The maximum invariant id.  */
 303   unsigned max_inv_id;
 304
 305   /* Whether to consider just related and important candidates when replacing a
 306      use.  */
 307   bool consider_all_candidates;
 308
 309   /* Are we optimizing for speed?  */
 310   bool speed;
 311
 312   /* Whether the loop body includes any function calls.  */
 313   bool body_includes_call;
 314
 315   /* Whether the loop body can only be exited via single exit.  */
 316   bool loop_single_exit_p;
 317 };
 318
 319 /* An assignment of iv candidates to uses.  */
 320
 321 struct iv_ca
 322 {
 323   /* The number of uses covered by the assignment.  */
 324   unsigned upto;
 325
 326   /* Number of uses that cannot be expressed by the candidates in the set.  */
 327   unsigned bad_uses;
 328
 329   /* Candidate assigned to a use, together with the related costs.  */
 330   struct cost_pair **cand_for_use;
 331
 332   /* Number of times each candidate is used.  */
 333   unsigned *n_cand_uses;
 334
 335   /* The candidates used.  */
 336   bitmap cands;
 337
 338   /* The number of candidates in the set.  */
 339   unsigned n_cands;
 340
 341   /* Total number of registers needed.  */
 342   unsigned n_regs;
 343
 344   /* Total cost of expressing uses.  */
 345   comp_cost cand_use_cost;
 346
 347   /* Total cost of candidates.  */
 348   unsigned cand_cost;
 349
 350   /* Number of times each invariant is used.  */
 351   unsigned *n_invariant_uses;
 352
 353   /* The array holding the number of uses of each loop
 354      invariant expressions created by ivopt.  */
 355   unsigned *used_inv_expr;
 356
 357   /* The number of created loop invariants.  */
 358   unsigned num_used_inv_expr;
 359
 360   /* Total cost of the assignment.  */
 361   comp_cost cost;
 362 };
 363
 364 /* Difference of two iv candidate assignments.  */
 365
 366 struct iv_ca_delta
 367 {
 368   /* Changed use.  */
 369   struct iv_use *use;
 370
 371   /* An old assignment (for rollback purposes).  */
 372   struct cost_pair *old_cp;
 373
 374   /* A new assignment.  */
 375   struct cost_pair *new_cp;
 376
 377   /* Next change in the list.  */
 378   struct iv_ca_delta *next_change;
 379 };
 380
 381 /* Bound on number of candidates below that all candidates are considered.  */
 382
 383 #define CONSIDER_ALL_CANDIDATES_BOUND \
 384   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 385
 386 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 387    optimizing such a loop would help, and it would take ages).  */
 388
 389 #define MAX_CONSIDERED_USES \
 390   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 391
 392 /* If there are at most this number of ivs in the set, try removing unnecessary
 393    ivs from the set always.  */
 394
 395 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 396   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 397
 398 /* The list of trees for that the decl_rtl field must be reset is stored
 399    here.  */
 400
 401 static vec<tree> decl_rtl_to_reset;
 402
 403 static comp_cost force_expr_to_var_cost (tree, bool);
 404
 405 /* Number of uses recorded in DATA.  */
 406
 407 static inline unsigned
 408 n_iv_uses (struct ivopts_data *data)
 409 {
 410   return data->iv_uses.length ();
 411 }
 412
 413 /* Ith use recorded in DATA.  */
 414
 415 static inline struct iv_use *
 416 iv_use (struct ivopts_data *data, unsigned i)
 417 {
 418   return data->iv_uses[i];
 419 }
 420
 421 /* Number of candidates recorded in DATA.  */
 422
 423 static inline unsigned
 424 n_iv_cands (struct ivopts_data *data)
 425 {
 426   return data->iv_candidates.length ();
 427 }
 428
 429 /* Ith candidate recorded in DATA.  */
 430
 431 static inline struct iv_cand *
 432 iv_cand (struct ivopts_data *data, unsigned i)
 433 {
 434   return data->iv_candidates[i];
 435 }
 436
 437 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 438
 439 edge
 440 single_dom_exit (struct loop *loop)
 441 {
 442   edge exit = single_exit (loop);
 443
 444   if (!exit)
 445     return NULL;
 446
 447   if (!just_once_each_iteration_p (loop, exit->src))
 448     return NULL;
 449
 450   return exit;
 451 }
 452
 453 /* Dumps information about the induction variable IV to FILE.  */
 454
 455 void
 456 dump_iv (FILE *file, struct iv *iv)
 457 {
 458   if (iv->ssa_name)
 459     {
 460       fprintf (file, "ssa name ");
 461       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 462       fprintf (file, "\n");
 463     }
 464
 465   fprintf (file, "  type ");
 466   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 467   fprintf (file, "\n");
 468
 469   if (iv->step)
 470     {
 471       fprintf (file, "  base ");
 472       print_generic_expr (file, iv->base, TDF_SLIM);
 473       fprintf (file, "\n");
 474
 475       fprintf (file, "  step ");
 476       print_generic_expr (file, iv->step, TDF_SLIM);
 477       fprintf (file, "\n");
 478     }
 479   else
 480     {
 481       fprintf (file, "  invariant ");
 482       print_generic_expr (file, iv->base, TDF_SLIM);
 483       fprintf (file, "\n");
 484     }
 485
 486   if (iv->base_object)
 487     {
 488       fprintf (file, "  base object ");
 489       print_generic_expr (file, iv->base_object, TDF_SLIM);
 490       fprintf (file, "\n");
 491     }
 492
 493   if (iv->biv_p)
 494     fprintf (file, "  is a biv\n");
 495 }
 496
 497 /* Dumps information about the USE to FILE.  */
 498
 499 void
 500 dump_use (FILE *file, struct iv_use *use)
 501 {
 502   fprintf (file, "use %d\n", use->id);
 503
 504   switch (use->type)
 505     {
 506     case USE_NONLINEAR_EXPR:
 507       fprintf (file, "  generic\n");
 508       break;
 509
 510     case USE_ADDRESS:
 511       fprintf (file, "  address\n");
 512       break;
 513
 514     case USE_COMPARE:
 515       fprintf (file, "  compare\n");
 516       break;
 517
 518     default:
 519       gcc_unreachable ();
 520     }
 521
 522   fprintf (file, "  in statement ");
 523   print_gimple_stmt (file, use->stmt, 0, 0);
 524   fprintf (file, "\n");
 525
 526   fprintf (file, "  at position ");
 527   if (use->op_p)
 528     print_generic_expr (file, *use->op_p, TDF_SLIM);
 529   fprintf (file, "\n");
 530
 531   dump_iv (file, use->iv);
 532
 533   if (use->related_cands)
 534     {
 535       fprintf (file, "  related candidates ");
 536       dump_bitmap (file, use->related_cands);
 537     }
 538 }
 539
 540 /* Dumps information about the uses to FILE.  */
 541
 542 void
 543 dump_uses (FILE *file, struct ivopts_data *data)
 544 {
 545   unsigned i;
 546   struct iv_use *use;
 547
 548   for (i = 0; i < n_iv_uses (data); i++)
 549     {
 550       use = iv_use (data, i);
 551
 552       dump_use (file, use);
 553       fprintf (file, "\n");
 554     }
 555 }
 556
 557 /* Dumps information about induction variable candidate CAND to FILE.  */
 558
 559 void
 560 dump_cand (FILE *file, struct iv_cand *cand)
 561 {
 562   struct iv *iv = cand->iv;
 563
 564   fprintf (file, "candidate %d%s\n",
 565            cand->id, cand->important ? " (important)" : "");
 566
 567   if (cand->depends_on)
 568     {
 569       fprintf (file, "  depends on ");
 570       dump_bitmap (file, cand->depends_on);
 571     }
 572
 573   if (!iv)
 574     {
 575       fprintf (file, "  final value replacement\n");
 576       return;
 577     }
 578
 579   if (cand->var_before)
 580     {
 581       fprintf (file, "  var_before ");
 582       print_generic_expr (file, cand->var_before, TDF_SLIM);
 583       fprintf (file, "\n");
 584     }
 585   if (cand->var_after)
 586     {
 587       fprintf (file, "  var_after ");
 588       print_generic_expr (file, cand->var_after, TDF_SLIM);
 589       fprintf (file, "\n");
 590     }
 591
 592   switch (cand->pos)
 593     {
 594     case IP_NORMAL:
 595       fprintf (file, "  incremented before exit test\n");
 596       break;
 597
 598     case IP_BEFORE_USE:
 599       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 600       break;
 601
 602     case IP_AFTER_USE:
 603       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 604       break;
 605
 606     case IP_END:
 607       fprintf (file, "  incremented at end\n");
 608       break;
 609
 610     case IP_ORIGINAL:
 611       fprintf (file, "  original biv\n");
 612       break;
 613     }
 614
 615   dump_iv (file, iv);
 616 }
 617
 618 /* Returns the info for ssa version VER.  */
 619
 620 static inline struct version_info *
 621 ver_info (struct ivopts_data *data, unsigned ver)
 622 {
 623   return data->version_info + ver;
 624 }
 625
 626 /* Returns the info for ssa name NAME.  */
 627
 628 static inline struct version_info *
 629 name_info (struct ivopts_data *data, tree name)
 630 {
 631   return ver_info (data, SSA_NAME_VERSION (name));
 632 }
 633
 634 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 635    emitted in LOOP.  */
 636
 637 static bool
 638 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 639 {
 640   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 641
 642   gcc_assert (bb);
 643
 644   if (sbb == loop->latch)
 645     return true;
 646
 647   if (sbb != bb)
 648     return false;
 649
 650   return stmt == last_stmt (bb);
 651 }
 652
 653 /* Returns true if STMT if after the place where the original induction
 654    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 655    if the positions are identical.  */
 656
 657 static bool
 658 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 659 {
 660   basic_block cand_bb = gimple_bb (cand->incremented_at);
 661   basic_block stmt_bb = gimple_bb (stmt);
 662
 663   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 664     return false;
 665
 666   if (stmt_bb != cand_bb)
 667     return true;
 668
 669   if (true_if_equal
 670       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 671     return true;
 672   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 673 }
 674
 675 /* Returns true if STMT if after the place where the induction variable
 676    CAND is incremented in LOOP.  */
 677
 678 static bool
 679 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 680 {
 681   switch (cand->pos)
 682     {
 683     case IP_END:
 684       return false;
 685
 686     case IP_NORMAL:
 687       return stmt_after_ip_normal_pos (loop, stmt);
 688
 689     case IP_ORIGINAL:
 690     case IP_AFTER_USE:
 691       return stmt_after_inc_pos (cand, stmt, false);
 692
 693     case IP_BEFORE_USE:
 694       return stmt_after_inc_pos (cand, stmt, true);
 695
 696     default:
 697       gcc_unreachable ();
 698     }
 699 }
 700
 701 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 702
 703 static bool
 704 abnormal_ssa_name_p (tree exp)
 705 {
 706   if (!exp)
 707     return false;
 708
 709   if (TREE_CODE (exp) != SSA_NAME)
 710     return false;
 711
 712   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 713 }
 714
 715 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 716    abnormal phi node.  Callback for for_each_index.  */
 717
 718 static bool
 719 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 720                                   void *data ATTRIBUTE_UNUSED)
 721 {
 722   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 723     {
 724       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 725         return false;
 726       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 727         return false;
 728     }
 729
 730   return !abnormal_ssa_name_p (*index);
 731 }
 732
 733 /* Returns true if EXPR contains a ssa name that occurs in an
 734    abnormal phi node.  */
 735
 736 bool
 737 contains_abnormal_ssa_name_p (tree expr)
 738 {
 739   enum tree_code code;
 740   enum tree_code_class codeclass;
 741
 742   if (!expr)
 743     return false;
 744
 745   code = TREE_CODE (expr);
 746   codeclass = TREE_CODE_CLASS (code);
 747
 748   if (code == SSA_NAME)
 749     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 750
 751   if (code == INTEGER_CST
 752       || is_gimple_min_invariant (expr))
 753     return false;
 754
 755   if (code == ADDR_EXPR)
 756     return !for_each_index (&TREE_OPERAND (expr, 0),
 757                             idx_contains_abnormal_ssa_name_p,
 758                             NULL);
 759
 760   if (code == COND_EXPR)
 761     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 762       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 763       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 764
 765   switch (codeclass)
 766     {
 767     case tcc_binary:
 768     case tcc_comparison:
 769       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 770         return true;
 771
 772       /* Fallthru.  */
 773     case tcc_unary:
 774       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 775         return true;
 776
 777       break;
 778
 779     default:
 780       gcc_unreachable ();
 781     }
 782
 783   return false;
 784 }
 785
 786 /*  Returns the structure describing number of iterations determined from
 787     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 788
 789 static struct tree_niter_desc *
 790 niter_for_exit (struct ivopts_data *data, edge exit)
 791 {
 792   struct tree_niter_desc *desc;
 793   void **slot;
 794
 795   if (!data->niters)
 796     {
 797       data->niters = pointer_map_create ();
 798       slot = NULL;
 799     }
 800   else
 801     slot = pointer_map_contains (data->niters, exit);
 802
 803   if (!slot)
 804     {
 805       /* Try to determine number of iterations.  We cannot safely work with ssa
 806          names that appear in phi nodes on abnormal edges, so that we do not
 807          create overlapping life ranges for them (PR 27283).  */
 808       desc = XNEW (struct tree_niter_desc);
 809       if (!number_of_iterations_exit (data->current_loop,
 810                                       exit, desc, true)
 811           || contains_abnormal_ssa_name_p (desc->niter))
 812         {
 813           XDELETE (desc);
 814           desc = NULL;
 815         }
 816       slot = pointer_map_insert (data->niters, exit);
 817       *slot = desc;
 818     }
 819   else
 820     desc = (struct tree_niter_desc *) *slot;
 821
 822   return desc;
 823 }
 824
 825 /* Returns the structure describing number of iterations determined from
 826    single dominating exit of DATA->current_loop, or NULL if something
 827    goes wrong.  */
 828
 829 static struct tree_niter_desc *
 830 niter_for_single_dom_exit (struct ivopts_data *data)
 831 {
 832   edge exit = single_dom_exit (data->current_loop);
 833
 834   if (!exit)
 835     return NULL;
 836
 837   return niter_for_exit (data, exit);
 838 }
 839
 840 /* Initializes data structures used by the iv optimization pass, stored
 841    in DATA.  */
 842
 843 static void
 844 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 845 {
 846   data->version_info_size = 2 * num_ssa_names;
 847   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 848   data->relevant = BITMAP_ALLOC (NULL);
 849   data->important_candidates = BITMAP_ALLOC (NULL);
 850   data->max_inv_id = 0;
 851   data->niters = NULL;
 852   data->iv_uses.create (20);
 853   data->iv_candidates.create (20);
 854   data->inv_expr_tab.create (10);
 855   data->inv_expr_id = 0;
 856   decl_rtl_to_reset.create (20);
 857 }
 858
 859 /* Returns a memory object to that EXPR points.  In case we are able to
 860    determine that it does not point to any such object, NULL is returned.  */
 861
 862 static tree
 863 determine_base_object (tree expr)
 864 {
 865   enum tree_code code = TREE_CODE (expr);
 866   tree base, obj;
 867
 868   /* If this is a pointer casted to any type, we need to determine
 869      the base object for the pointer; so handle conversions before
 870      throwing away non-pointer expressions.  */
 871   if (CONVERT_EXPR_P (expr))
 872     return determine_base_object (TREE_OPERAND (expr, 0));
 873
 874   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 875     return NULL_TREE;
 876
 877   switch (code)
 878     {
 879     case INTEGER_CST:
 880       return NULL_TREE;
 881
 882     case ADDR_EXPR:
 883       obj = TREE_OPERAND (expr, 0);
 884       base = get_base_address (obj);
 885
 886       if (!base)
 887         return expr;
 888
 889       if (TREE_CODE (base) == MEM_REF)
 890         return determine_base_object (TREE_OPERAND (base, 0));
 891
 892       return fold_convert (ptr_type_node,
 893                            build_fold_addr_expr (base));
 894
 895     case POINTER_PLUS_EXPR:
 896       return determine_base_object (TREE_OPERAND (expr, 0));
 897
 898     case PLUS_EXPR:
 899     case MINUS_EXPR:
 900       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 901       gcc_unreachable ();
 902
 903     default:
 904       return fold_convert (ptr_type_node, expr);
 905     }
 906 }
 907
 908 /* Allocates an induction variable with given initial value BASE and step STEP
 909    for loop LOOP.  */
 910
 911 static struct iv *
 912 alloc_iv (tree base, tree step)
 913 {
 914   struct iv *iv = XCNEW (struct iv);
 915   gcc_assert (step != NULL_TREE);
 916
 917   iv->base = base;
 918   iv->base_object = determine_base_object (base);
 919   iv->step = step;
 920   iv->biv_p = false;
 921   iv->have_use_for = false;
 922   iv->use_id = 0;
 923   iv->ssa_name = NULL_TREE;
 924
 925   return iv;
 926 }
 927
 928 /* Sets STEP and BASE for induction variable IV.  */
 929
 930 static void
 931 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 932 {
 933   struct version_info *info = name_info (data, iv);
 934
 935   gcc_assert (!info->iv);
 936
 937   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 938   info->iv = alloc_iv (base, step);
 939   info->iv->ssa_name = iv;
 940 }
 941
 942 /* Finds induction variable declaration for VAR.  */
 943
 944 static struct iv *
 945 get_iv (struct ivopts_data *data, tree var)
 946 {
 947   basic_block bb;
 948   tree type = TREE_TYPE (var);
 949
 950   if (!POINTER_TYPE_P (type)
 951       && !INTEGRAL_TYPE_P (type))
 952     return NULL;
 953
 954   if (!name_info (data, var)->iv)
 955     {
 956       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 957
 958       if (!bb
 959           || !flow_bb_inside_loop_p (data->current_loop, bb))
 960         set_iv (data, var, var, build_int_cst (type, 0));
 961     }
 962
 963   return name_info (data, var)->iv;
 964 }
 965
 966 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 967    not define a simple affine biv with nonzero step.  */
 968
 969 static tree
 970 determine_biv_step (gimple phi)
 971 {
 972   struct loop *loop = gimple_bb (phi)->loop_father;
 973   tree name = PHI_RESULT (phi);
 974   affine_iv iv;
 975
 976   if (virtual_operand_p (name))
 977     return NULL_TREE;
 978
 979   if (!simple_iv (loop, loop, name, &iv, true))
 980     return NULL_TREE;
 981
 982   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 983 }
 984
 985 /* Finds basic ivs.  */
 986
 987 static bool
 988 find_bivs (struct ivopts_data *data)
 989 {
 990   gimple phi;
 991   tree step, type, base;
 992   bool found = false;
 993   struct loop *loop = data->current_loop;
 994   gimple_stmt_iterator psi;
 995
 996   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 997     {
 998       phi = gsi_stmt (psi);
 999
1000       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1001         continue;
1002
1003       step = determine_biv_step (phi);
1004       if (!step)
1005         continue;
1006
1007       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1008       base = expand_simple_operations (base);
1009       if (contains_abnormal_ssa_name_p (base)
1010           || contains_abnormal_ssa_name_p (step))
1011         continue;
1012
1013       type = TREE_TYPE (PHI_RESULT (phi));
1014       base = fold_convert (type, base);
1015       if (step)
1016         {
1017           if (POINTER_TYPE_P (type))
1018             step = convert_to_ptrofftype (step);
1019           else
1020             step = fold_convert (type, step);
1021         }
1022
1023       set_iv (data, PHI_RESULT (phi), base, step);
1024       found = true;
1025     }
1026
1027   return found;
1028 }
1029
1030 /* Marks basic ivs.  */
1031
1032 static void
1033 mark_bivs (struct ivopts_data *data)
1034 {
1035   gimple phi;
1036   tree var;
1037   struct iv *iv, *incr_iv;
1038   struct loop *loop = data->current_loop;
1039   basic_block incr_bb;
1040   gimple_stmt_iterator psi;
1041
1042   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1043     {
1044       phi = gsi_stmt (psi);
1045
1046       iv = get_iv (data, PHI_RESULT (phi));
1047       if (!iv)
1048         continue;
1049
1050       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1051       incr_iv = get_iv (data, var);
1052       if (!incr_iv)
1053         continue;
1054
1055       /* If the increment is in the subloop, ignore it.  */
1056       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1057       if (incr_bb->loop_father != data->current_loop
1058           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1059         continue;
1060
1061       iv->biv_p = true;
1062       incr_iv->biv_p = true;
1063     }
1064 }
1065
1066 /* Checks whether STMT defines a linear induction variable and stores its
1067    parameters to IV.  */
1068
1069 static bool
1070 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1071 {
1072   tree lhs;
1073   struct loop *loop = data->current_loop;
1074
1075   iv->base = NULL_TREE;
1076   iv->step = NULL_TREE;
1077
1078   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1079     return false;
1080
1081   lhs = gimple_assign_lhs (stmt);
1082   if (TREE_CODE (lhs) != SSA_NAME)
1083     return false;
1084
1085   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1086     return false;
1087   iv->base = expand_simple_operations (iv->base);
1088
1089   if (contains_abnormal_ssa_name_p (iv->base)
1090       || contains_abnormal_ssa_name_p (iv->step))
1091     return false;
1092
1093   /* If STMT could throw, then do not consider STMT as defining a GIV.
1094      While this will suppress optimizations, we can not safely delete this
1095      GIV and associated statements, even if it appears it is not used.  */
1096   if (stmt_could_throw_p (stmt))
1097     return false;
1098
1099   return true;
1100 }
1101
1102 /* Finds general ivs in statement STMT.  */
1103
1104 static void
1105 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1106 {
1107   affine_iv iv;
1108
1109   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1110     return;
1111
1112   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1113 }
1114
1115 /* Finds general ivs in basic block BB.  */
1116
1117 static void
1118 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1119 {
1120   gimple_stmt_iterator bsi;
1121
1122   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1123     find_givs_in_stmt (data, gsi_stmt (bsi));
1124 }
1125
1126 /* Finds general ivs.  */
1127
1128 static void
1129 find_givs (struct ivopts_data *data)
1130 {
1131   struct loop *loop = data->current_loop;
1132   basic_block *body = get_loop_body_in_dom_order (loop);
1133   unsigned i;
1134
1135   for (i = 0; i < loop->num_nodes; i++)
1136     find_givs_in_bb (data, body[i]);
1137   free (body);
1138 }
1139
1140 /* For each ssa name defined in LOOP determines whether it is an induction
1141    variable and if so, its initial value and step.  */
1142
1143 static bool
1144 find_induction_variables (struct ivopts_data *data)
1145 {
1146   unsigned i;
1147   bitmap_iterator bi;
1148
1149   if (!find_bivs (data))
1150     return false;
1151
1152   find_givs (data);
1153   mark_bivs (data);
1154
1155   if (dump_file && (dump_flags & TDF_DETAILS))
1156     {
1157       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1158
1159       if (niter)
1160         {
1161           fprintf (dump_file, "  number of iterations ");
1162           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1163           if (!integer_zerop (niter->may_be_zero))
1164             {
1165               fprintf (dump_file, "; zero if ");
1166               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1167             }
1168           fprintf (dump_file, "\n\n");
1169         };
1170
1171       fprintf (dump_file, "Induction variables:\n\n");
1172
1173       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1174         {
1175           if (ver_info (data, i)->iv)
1176             dump_iv (dump_file, ver_info (data, i)->iv);
1177         }
1178     }
1179
1180   return true;
1181 }
1182
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1184
1185 static struct iv_use *
1186 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1187             gimple stmt, enum use_type use_type)
1188 {
1189   struct iv_use *use = XCNEW (struct iv_use);
1190
1191   use->id = n_iv_uses (data);
1192   use->type = use_type;
1193   use->iv = iv;
1194   use->stmt = stmt;
1195   use->op_p = use_p;
1196   use->related_cands = BITMAP_ALLOC (NULL);
1197
1198   /* To avoid showing ssa name in the dumps, if it was not reset by the
1199      caller.  */
1200   iv->ssa_name = NULL_TREE;
1201
1202   if (dump_file && (dump_flags & TDF_DETAILS))
1203     dump_use (dump_file, use);
1204
1205   data->iv_uses.safe_push (use);
1206
1207   return use;
1208 }
1209
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211    NONLINEAR_USE is true if the invariant is used in a way we do not
1212    handle specially.  */
1213
1214 static void
1215 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1216 {
1217   basic_block bb;
1218   struct version_info *info;
1219
1220   if (TREE_CODE (op) != SSA_NAME
1221       || virtual_operand_p (op))
1222     return;
1223
1224   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1225   if (bb
1226       && flow_bb_inside_loop_p (data->current_loop, bb))
1227     return;
1228
1229   info = name_info (data, op);
1230   info->name = op;
1231   info->has_nonlin_use |= nonlinear_use;
1232   if (!info->inv_id)
1233     info->inv_id = ++data->max_inv_id;
1234   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1235 }
1236
1237 /* Checks whether the use OP is interesting and if so, records it.  */
1238
1239 static struct iv_use *
1240 find_interesting_uses_op (struct ivopts_data *data, tree op)
1241 {
1242   struct iv *iv;
1243   struct iv *civ;
1244   gimple stmt;
1245   struct iv_use *use;
1246
1247   if (TREE_CODE (op) != SSA_NAME)
1248     return NULL;
1249
1250   iv = get_iv (data, op);
1251   if (!iv)
1252     return NULL;
1253
1254   if (iv->have_use_for)
1255     {
1256       use = iv_use (data, iv->use_id);
1257
1258       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1259       return use;
1260     }
1261
1262   if (integer_zerop (iv->step))
1263     {
1264       record_invariant (data, op, true);
1265       return NULL;
1266     }
1267   iv->have_use_for = true;
1268
1269   civ = XNEW (struct iv);
1270   *civ = *iv;
1271
1272   stmt = SSA_NAME_DEF_STMT (op);
1273   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1274               || is_gimple_assign (stmt));
1275
1276   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1277   iv->use_id = use->id;
1278
1279   return use;
1280 }
1281
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283    of an induction variable and an invariant.  If this is the case,
1284    CONTROL_VAR is set to location of the iv, BOUND to the location of
1285    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286    induction variable descriptions, and true is returned.  If this is not
1287    the case, CONTROL_VAR and BOUND are set to the arguments of the
1288    condition and false is returned.  */
1289
1290 static bool
1291 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1292                        tree **control_var, tree **bound,
1293                        struct iv **iv_var, struct iv **iv_bound)
1294 {
1295   /* The objects returned when COND has constant operands.  */
1296   static struct iv const_iv;
1297   static tree zero;
1298   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1299   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1300   bool ret = false;
1301
1302   if (gimple_code (stmt) == GIMPLE_COND)
1303     {
1304       op0 = gimple_cond_lhs_ptr (stmt);
1305       op1 = gimple_cond_rhs_ptr (stmt);
1306     }
1307   else
1308     {
1309       op0 = gimple_assign_rhs1_ptr (stmt);
1310       op1 = gimple_assign_rhs2_ptr (stmt);
1311     }
1312
1313   zero = integer_zero_node;
1314   const_iv.step = integer_zero_node;
1315
1316   if (TREE_CODE (*op0) == SSA_NAME)
1317     iv0 = get_iv (data, *op0);
1318   if (TREE_CODE (*op1) == SSA_NAME)
1319     iv1 = get_iv (data, *op1);
1320
1321   /* Exactly one of the compared values must be an iv, and the other one must
1322      be an invariant.  */
1323   if (!iv0 || !iv1)
1324     goto end;
1325
1326   if (integer_zerop (iv0->step))
1327     {
1328       /* Control variable may be on the other side.  */
1329       tmp_op = op0; op0 = op1; op1 = tmp_op;
1330       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1331     }
1332   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1333
1334 end:
1335   if (control_var)
1336     *control_var = op0;;
1337   if (iv_var)
1338     *iv_var = iv0;;
1339   if (bound)
1340     *bound = op1;
1341   if (iv_bound)
1342     *iv_bound = iv1;
1343
1344   return ret;
1345 }
1346
1347 /* Checks whether the condition in STMT is interesting and if so,
1348    records it.  */
1349
1350 static void
1351 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1352 {
1353   tree *var_p, *bound_p;
1354   struct iv *var_iv, *civ;
1355
1356   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1357     {
1358       find_interesting_uses_op (data, *var_p);
1359       find_interesting_uses_op (data, *bound_p);
1360       return;
1361     }
1362
1363   civ = XNEW (struct iv);
1364   *civ = *var_iv;
1365   record_use (data, NULL, civ, stmt, USE_COMPARE);
1366 }
1367
1368 /* Returns the outermost loop EXPR is obviously invariant in
1369    relative to the loop LOOP, i.e. if all its operands are defined
1370    outside of the returned loop.  Returns NULL if EXPR is not
1371    even obviously invariant in LOOP.  */
1372
1373 struct loop *
1374 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1375 {
1376   basic_block def_bb;
1377   unsigned i, len;
1378
1379   if (is_gimple_min_invariant (expr))
1380     return current_loops->tree_root;
1381
1382   if (TREE_CODE (expr) == SSA_NAME)
1383     {
1384       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1385       if (def_bb)
1386         {
1387           if (flow_bb_inside_loop_p (loop, def_bb))
1388             return NULL;
1389           return superloop_at_depth (loop,
1390                                      loop_depth (def_bb->loop_father) + 1);
1391         }
1392
1393       return current_loops->tree_root;
1394     }
1395
1396   if (!EXPR_P (expr))
1397     return NULL;
1398
1399   unsigned maxdepth = 0;
1400   len = TREE_OPERAND_LENGTH (expr);
1401   for (i = 0; i < len; i++)
1402     {
1403       struct loop *ivloop;
1404       if (!TREE_OPERAND (expr, i))
1405         continue;
1406
1407       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1408       if (!ivloop)
1409         return NULL;
1410       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1411     }
1412
1413   return superloop_at_depth (loop, maxdepth);
1414 }
1415
1416 /* Returns true if expression EXPR is obviously invariant in LOOP,
1417    i.e. if all its operands are defined outside of the LOOP.  LOOP
1418    should not be the function body.  */
1419
1420 bool
1421 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1422 {
1423   basic_block def_bb;
1424   unsigned i, len;
1425
1426   gcc_assert (loop_depth (loop) > 0);
1427
1428   if (is_gimple_min_invariant (expr))
1429     return true;
1430
1431   if (TREE_CODE (expr) == SSA_NAME)
1432     {
1433       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1434       if (def_bb
1435           && flow_bb_inside_loop_p (loop, def_bb))
1436         return false;
1437
1438       return true;
1439     }
1440
1441   if (!EXPR_P (expr))
1442     return false;
1443
1444   len = TREE_OPERAND_LENGTH (expr);
1445   for (i = 0; i < len; i++)
1446     if (TREE_OPERAND (expr, i)
1447         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1448       return false;
1449
1450   return true;
1451 }
1452
1453 /* Cumulates the steps of indices into DATA and replaces their values with the
1454    initial ones.  Returns false when the value of the index cannot be determined.
1455    Callback for for_each_index.  */
1456
1457 struct ifs_ivopts_data
1458 {
1459   struct ivopts_data *ivopts_data;
1460   gimple stmt;
1461   tree step;
1462 };
1463
1464 static bool
1465 idx_find_step (tree base, tree *idx, void *data)
1466 {
1467   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1468   struct iv *iv;
1469   tree step, iv_base, iv_step, lbound, off;
1470   struct loop *loop = dta->ivopts_data->current_loop;
1471
1472   /* If base is a component ref, require that the offset of the reference
1473      be invariant.  */
1474   if (TREE_CODE (base) == COMPONENT_REF)
1475     {
1476       off = component_ref_field_offset (base);
1477       return expr_invariant_in_loop_p (loop, off);
1478     }
1479
1480   /* If base is array, first check whether we will be able to move the
1481      reference out of the loop (in order to take its address in strength
1482      reduction).  In order for this to work we need both lower bound
1483      and step to be loop invariants.  */
1484   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1485     {
1486       /* Moreover, for a range, the size needs to be invariant as well.  */
1487       if (TREE_CODE (base) == ARRAY_RANGE_REF
1488           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1489         return false;
1490
1491       step = array_ref_element_size (base);
1492       lbound = array_ref_low_bound (base);
1493
1494       if (!expr_invariant_in_loop_p (loop, step)
1495           || !expr_invariant_in_loop_p (loop, lbound))
1496         return false;
1497     }
1498
1499   if (TREE_CODE (*idx) != SSA_NAME)
1500     return true;
1501
1502   iv = get_iv (dta->ivopts_data, *idx);
1503   if (!iv)
1504     return false;
1505
1506   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1507           *&x[0], which is not folded and does not trigger the
1508           ARRAY_REF path below.  */
1509   *idx = iv->base;
1510
1511   if (integer_zerop (iv->step))
1512     return true;
1513
1514   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1515     {
1516       step = array_ref_element_size (base);
1517
1518       /* We only handle addresses whose step is an integer constant.  */
1519       if (TREE_CODE (step) != INTEGER_CST)
1520         return false;
1521     }
1522   else
1523     /* The step for pointer arithmetics already is 1 byte.  */
1524     step = size_one_node;
1525
1526   iv_base = iv->base;
1527   iv_step = iv->step;
1528   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1529                             sizetype, &iv_base, &iv_step, dta->stmt,
1530                             false))
1531     {
1532       /* The index might wrap.  */
1533       return false;
1534     }
1535
1536   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1537   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1538
1539   return true;
1540 }
1541
1542 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1543    object is passed to it in DATA.  */
1544
1545 static bool
1546 idx_record_use (tree base, tree *idx,
1547                 void *vdata)
1548 {
1549   struct ivopts_data *data = (struct ivopts_data *) vdata;
1550   find_interesting_uses_op (data, *idx);
1551   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1552     {
1553       find_interesting_uses_op (data, array_ref_element_size (base));
1554       find_interesting_uses_op (data, array_ref_low_bound (base));
1555     }
1556   return true;
1557 }
1558
1559 /* If we can prove that TOP = cst * BOT for some constant cst,
1560    store cst to MUL and return true.  Otherwise return false.
1561    The returned value is always sign-extended, regardless of the
1562    signedness of TOP and BOT.  */
1563
1564 static bool
1565 constant_multiple_of (tree top, tree bot, double_int *mul)
1566 {
1567   tree mby;
1568   enum tree_code code;
1569   double_int res, p0, p1;
1570   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1571
1572   STRIP_NOPS (top);
1573   STRIP_NOPS (bot);
1574
1575   if (operand_equal_p (top, bot, 0))
1576     {
1577       *mul = double_int_one;
1578       return true;
1579     }
1580
1581   code = TREE_CODE (top);
1582   switch (code)
1583     {
1584     case MULT_EXPR:
1585       mby = TREE_OPERAND (top, 1);
1586       if (TREE_CODE (mby) != INTEGER_CST)
1587         return false;
1588
1589       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1590         return false;
1591
1592       *mul = (res * tree_to_double_int (mby)).sext (precision);
1593       return true;
1594
1595     case PLUS_EXPR:
1596     case MINUS_EXPR:
1597       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1598           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1599         return false;
1600
1601       if (code == MINUS_EXPR)
1602         p1 = -p1;
1603       *mul = (p0 + p1).sext (precision);
1604       return true;
1605
1606     case INTEGER_CST:
1607       if (TREE_CODE (bot) != INTEGER_CST)
1608         return false;
1609
1610       p0 = tree_to_double_int (top).sext (precision);
1611       p1 = tree_to_double_int (bot).sext (precision);
1612       if (p1.is_zero ())
1613         return false;
1614       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1615       return res.is_zero ();
1616
1617     default:
1618       return false;
1619     }
1620 }
1621
1622 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1623
1624 static bool
1625 may_be_unaligned_p (tree ref, tree step)
1626 {
1627   tree base;
1628   tree base_type;
1629   HOST_WIDE_INT bitsize;
1630   HOST_WIDE_INT bitpos;
1631   tree toffset;
1632   enum machine_mode mode;
1633   int unsignedp, volatilep;
1634   unsigned base_align;
1635
1636   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1637      thus they are not misaligned.  */
1638   if (TREE_CODE (ref) == TARGET_MEM_REF)
1639     return false;
1640
1641   /* The test below is basically copy of what expr.c:normal_inner_ref
1642      does to check whether the object must be loaded by parts when
1643      STRICT_ALIGNMENT is true.  */
1644   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1645                               &unsignedp, &volatilep, true);
1646   base_type = TREE_TYPE (base);
1647   base_align = get_object_alignment (base);
1648   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1649
1650   if (mode != BLKmode)
1651     {
1652       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1653
1654       if (base_align < mode_align
1655           || (bitpos % mode_align) != 0
1656           || (bitpos % BITS_PER_UNIT) != 0)
1657         return true;
1658
1659       if (toffset
1660           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1661         return true;
1662
1663       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1664         return true;
1665     }
1666
1667   return false;
1668 }
1669
1670 /* Return true if EXPR may be non-addressable.   */
1671
1672 bool
1673 may_be_nonaddressable_p (tree expr)
1674 {
1675   switch (TREE_CODE (expr))
1676     {
1677     case TARGET_MEM_REF:
1678       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1679          target, thus they are always addressable.  */
1680       return false;
1681
1682     case COMPONENT_REF:
1683       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1684              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1685
1686     case VIEW_CONVERT_EXPR:
1687       /* This kind of view-conversions may wrap non-addressable objects
1688          and make them look addressable.  After some processing the
1689          non-addressability may be uncovered again, causing ADDR_EXPRs
1690          of inappropriate objects to be built.  */
1691       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1692           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1693         return true;
1694
1695       /* ... fall through ... */
1696
1697     case ARRAY_REF:
1698     case ARRAY_RANGE_REF:
1699       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1700
1701     CASE_CONVERT:
1702       return true;
1703
1704     default:
1705       break;
1706     }
1707
1708   return false;
1709 }
1710
1711 /* Finds addresses in *OP_P inside STMT.  */
1712
1713 static void
1714 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1715 {
1716   tree base = *op_p, step = size_zero_node;
1717   struct iv *civ;
1718   struct ifs_ivopts_data ifs_ivopts_data;
1719
1720   /* Do not play with volatile memory references.  A bit too conservative,
1721      perhaps, but safe.  */
1722   if (gimple_has_volatile_ops (stmt))
1723     goto fail;
1724
1725   /* Ignore bitfields for now.  Not really something terribly complicated
1726      to handle.  TODO.  */
1727   if (TREE_CODE (base) == BIT_FIELD_REF)
1728     goto fail;
1729
1730   base = unshare_expr (base);
1731
1732   if (TREE_CODE (base) == TARGET_MEM_REF)
1733     {
1734       tree type = build_pointer_type (TREE_TYPE (base));
1735       tree astep;
1736
1737       if (TMR_BASE (base)
1738           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1739         {
1740           civ = get_iv (data, TMR_BASE (base));
1741           if (!civ)
1742             goto fail;
1743
1744           TMR_BASE (base) = civ->base;
1745           step = civ->step;
1746         }
1747       if (TMR_INDEX2 (base)
1748           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1749         {
1750           civ = get_iv (data, TMR_INDEX2 (base));
1751           if (!civ)
1752             goto fail;
1753
1754           TMR_INDEX2 (base) = civ->base;
1755           step = civ->step;
1756         }
1757       if (TMR_INDEX (base)
1758           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1759         {
1760           civ = get_iv (data, TMR_INDEX (base));
1761           if (!civ)
1762             goto fail;
1763
1764           TMR_INDEX (base) = civ->base;
1765           astep = civ->step;
1766
1767           if (astep)
1768             {
1769               if (TMR_STEP (base))
1770                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1771
1772               step = fold_build2 (PLUS_EXPR, type, step, astep);
1773             }
1774         }
1775
1776       if (integer_zerop (step))
1777         goto fail;
1778       base = tree_mem_ref_addr (type, base);
1779     }
1780   else
1781     {
1782       ifs_ivopts_data.ivopts_data = data;
1783       ifs_ivopts_data.stmt = stmt;
1784       ifs_ivopts_data.step = size_zero_node;
1785       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1786           || integer_zerop (ifs_ivopts_data.step))
1787         goto fail;
1788       step = ifs_ivopts_data.step;
1789
1790       /* Check that the base expression is addressable.  This needs
1791          to be done after substituting bases of IVs into it.  */
1792       if (may_be_nonaddressable_p (base))
1793         goto fail;
1794
1795       /* Moreover, on strict alignment platforms, check that it is
1796          sufficiently aligned.  */
1797       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1798         goto fail;
1799
1800       base = build_fold_addr_expr (base);
1801
1802       /* Substituting bases of IVs into the base expression might
1803          have caused folding opportunities.  */
1804       if (TREE_CODE (base) == ADDR_EXPR)
1805         {
1806           tree *ref = &TREE_OPERAND (base, 0);
1807           while (handled_component_p (*ref))
1808             ref = &TREE_OPERAND (*ref, 0);
1809           if (TREE_CODE (*ref) == MEM_REF)
1810             {
1811               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1812                                       TREE_OPERAND (*ref, 0),
1813                                       TREE_OPERAND (*ref, 1));
1814               if (tem)
1815                 *ref = tem;
1816             }
1817         }
1818     }
1819
1820   civ = alloc_iv (base, step);
1821   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1822   return;
1823
1824 fail:
1825   for_each_index (op_p, idx_record_use, data);
1826 }
1827
1828 /* Finds and records invariants used in STMT.  */
1829
1830 static void
1831 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1832 {
1833   ssa_op_iter iter;
1834   use_operand_p use_p;
1835   tree op;
1836
1837   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1838     {
1839       op = USE_FROM_PTR (use_p);
1840       record_invariant (data, op, false);
1841     }
1842 }
1843
1844 /* Finds interesting uses of induction variables in the statement STMT.  */
1845
1846 static void
1847 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1848 {
1849   struct iv *iv;
1850   tree op, *lhs, *rhs;
1851   ssa_op_iter iter;
1852   use_operand_p use_p;
1853   enum tree_code code;
1854
1855   find_invariants_stmt (data, stmt);
1856
1857   if (gimple_code (stmt) == GIMPLE_COND)
1858     {
1859       find_interesting_uses_cond (data, stmt);
1860       return;
1861     }
1862
1863   if (is_gimple_assign (stmt))
1864     {
1865       lhs = gimple_assign_lhs_ptr (stmt);
1866       rhs = gimple_assign_rhs1_ptr (stmt);
1867
1868       if (TREE_CODE (*lhs) == SSA_NAME)
1869         {
1870           /* If the statement defines an induction variable, the uses are not
1871              interesting by themselves.  */
1872
1873           iv = get_iv (data, *lhs);
1874
1875           if (iv && !integer_zerop (iv->step))
1876             return;
1877         }
1878
1879       code = gimple_assign_rhs_code (stmt);
1880       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1881           && (REFERENCE_CLASS_P (*rhs)
1882               || is_gimple_val (*rhs)))
1883         {
1884           if (REFERENCE_CLASS_P (*rhs))
1885             find_interesting_uses_address (data, stmt, rhs);
1886           else
1887             find_interesting_uses_op (data, *rhs);
1888
1889           if (REFERENCE_CLASS_P (*lhs))
1890             find_interesting_uses_address (data, stmt, lhs);
1891           return;
1892         }
1893       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1894         {
1895           find_interesting_uses_cond (data, stmt);
1896           return;
1897         }
1898
1899       /* TODO -- we should also handle address uses of type
1900
1901          memory = call (whatever);
1902
1903          and
1904
1905          call (memory).  */
1906     }
1907
1908   if (gimple_code (stmt) == GIMPLE_PHI
1909       && gimple_bb (stmt) == data->current_loop->header)
1910     {
1911       iv = get_iv (data, PHI_RESULT (stmt));
1912
1913       if (iv && !integer_zerop (iv->step))
1914         return;
1915     }
1916
1917   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1918     {
1919       op = USE_FROM_PTR (use_p);
1920
1921       if (TREE_CODE (op) != SSA_NAME)
1922         continue;
1923
1924       iv = get_iv (data, op);
1925       if (!iv)
1926         continue;
1927
1928       find_interesting_uses_op (data, op);
1929     }
1930 }
1931
1932 /* Finds interesting uses of induction variables outside of loops
1933    on loop exit edge EXIT.  */
1934
1935 static void
1936 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1937 {
1938   gimple phi;
1939   gimple_stmt_iterator psi;
1940   tree def;
1941
1942   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1943     {
1944       phi = gsi_stmt (psi);
1945       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1946       if (!virtual_operand_p (def))
1947         find_interesting_uses_op (data, def);
1948     }
1949 }
1950
1951 /* Finds uses of the induction variables that are interesting.  */
1952
1953 static void
1954 find_interesting_uses (struct ivopts_data *data)
1955 {
1956   basic_block bb;
1957   gimple_stmt_iterator bsi;
1958   basic_block *body = get_loop_body (data->current_loop);
1959   unsigned i;
1960   struct version_info *info;
1961   edge e;
1962
1963   if (dump_file && (dump_flags & TDF_DETAILS))
1964     fprintf (dump_file, "Uses:\n\n");
1965
1966   for (i = 0; i < data->current_loop->num_nodes; i++)
1967     {
1968       edge_iterator ei;
1969       bb = body[i];
1970
1971       FOR_EACH_EDGE (e, ei, bb->succs)
1972         if (e->dest != EXIT_BLOCK_PTR
1973             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1974           find_interesting_uses_outside (data, e);
1975
1976       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1977         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1978       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1979         if (!is_gimple_debug (gsi_stmt (bsi)))
1980           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1981     }
1982
1983   if (dump_file && (dump_flags & TDF_DETAILS))
1984     {
1985       bitmap_iterator bi;
1986
1987       fprintf (dump_file, "\n");
1988
1989       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1990         {
1991           info = ver_info (data, i);
1992           if (info->inv_id)
1993             {
1994               fprintf (dump_file, "  ");
1995               print_generic_expr (dump_file, info->name, TDF_SLIM);
1996               fprintf (dump_file, " is invariant (%d)%s\n",
1997                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1998             }
1999         }
2000
2001       fprintf (dump_file, "\n");
2002     }
2003
2004   free (body);
2005 }
2006
2007 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2008    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2009    we are at the top-level of the processed address.  */
2010
2011 static tree
2012 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2013                 unsigned HOST_WIDE_INT *offset)
2014 {
2015   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2016   enum tree_code code;
2017   tree type, orig_type = TREE_TYPE (expr);
2018   unsigned HOST_WIDE_INT off0, off1, st;
2019   tree orig_expr = expr;
2020
2021   STRIP_NOPS (expr);
2022
2023   type = TREE_TYPE (expr);
2024   code = TREE_CODE (expr);
2025   *offset = 0;
2026
2027   switch (code)
2028     {
2029     case INTEGER_CST:
2030       if (!cst_and_fits_in_hwi (expr)
2031           || integer_zerop (expr))
2032         return orig_expr;
2033
2034       *offset = int_cst_value (expr);
2035       return build_int_cst (orig_type, 0);
2036
2037     case POINTER_PLUS_EXPR:
2038     case PLUS_EXPR:
2039     case MINUS_EXPR:
2040       op0 = TREE_OPERAND (expr, 0);
2041       op1 = TREE_OPERAND (expr, 1);
2042
2043       op0 = strip_offset_1 (op0, false, false, &off0);
2044       op1 = strip_offset_1 (op1, false, false, &off1);
2045
2046       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2047       if (op0 == TREE_OPERAND (expr, 0)
2048           && op1 == TREE_OPERAND (expr, 1))
2049         return orig_expr;
2050
2051       if (integer_zerop (op1))
2052         expr = op0;
2053       else if (integer_zerop (op0))
2054         {
2055           if (code == MINUS_EXPR)
2056             expr = fold_build1 (NEGATE_EXPR, type, op1);
2057           else
2058             expr = op1;
2059         }
2060       else
2061         expr = fold_build2 (code, type, op0, op1);
2062
2063       return fold_convert (orig_type, expr);
2064
2065     case MULT_EXPR:
2066       op1 = TREE_OPERAND (expr, 1);
2067       if (!cst_and_fits_in_hwi (op1))
2068         return orig_expr;
2069
2070       op0 = TREE_OPERAND (expr, 0);
2071       op0 = strip_offset_1 (op0, false, false, &off0);
2072       if (op0 == TREE_OPERAND (expr, 0))
2073         return orig_expr;
2074
2075       *offset = off0 * int_cst_value (op1);
2076       if (integer_zerop (op0))
2077         expr = op0;
2078       else
2079         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2080
2081       return fold_convert (orig_type, expr);
2082
2083     case ARRAY_REF:
2084     case ARRAY_RANGE_REF:
2085       if (!inside_addr)
2086         return orig_expr;
2087
2088       step = array_ref_element_size (expr);
2089       if (!cst_and_fits_in_hwi (step))
2090         break;
2091
2092       st = int_cst_value (step);
2093       op1 = TREE_OPERAND (expr, 1);
2094       op1 = strip_offset_1 (op1, false, false, &off1);
2095       *offset = off1 * st;
2096
2097       if (top_compref
2098           && integer_zerop (op1))
2099         {
2100           /* Strip the component reference completely.  */
2101           op0 = TREE_OPERAND (expr, 0);
2102           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2103           *offset += off0;
2104           return op0;
2105         }
2106       break;
2107
2108     case COMPONENT_REF:
2109       if (!inside_addr)
2110         return orig_expr;
2111
2112       tmp = component_ref_field_offset (expr);
2113       if (top_compref
2114           && cst_and_fits_in_hwi (tmp))
2115         {
2116           /* Strip the component reference completely.  */
2117           op0 = TREE_OPERAND (expr, 0);
2118           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2119           *offset = off0 + int_cst_value (tmp);
2120           return op0;
2121         }
2122       break;
2123
2124     case ADDR_EXPR:
2125       op0 = TREE_OPERAND (expr, 0);
2126       op0 = strip_offset_1 (op0, true, true, &off0);
2127       *offset += off0;
2128
2129       if (op0 == TREE_OPERAND (expr, 0))
2130         return orig_expr;
2131
2132       expr = build_fold_addr_expr (op0);
2133       return fold_convert (orig_type, expr);
2134
2135     case MEM_REF:
2136       /* ???  Offset operand?  */
2137       inside_addr = false;
2138       break;
2139
2140     default:
2141       return orig_expr;
2142     }
2143
2144   /* Default handling of expressions for that we want to recurse into
2145      the first operand.  */
2146   op0 = TREE_OPERAND (expr, 0);
2147   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2148   *offset += off0;
2149
2150   if (op0 == TREE_OPERAND (expr, 0)
2151       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2152     return orig_expr;
2153
2154   expr = copy_node (expr);
2155   TREE_OPERAND (expr, 0) = op0;
2156   if (op1)
2157     TREE_OPERAND (expr, 1) = op1;
2158
2159   /* Inside address, we might strip the top level component references,
2160      thus changing type of the expression.  Handling of ADDR_EXPR
2161      will fix that.  */
2162   expr = fold_convert (orig_type, expr);
2163
2164   return expr;
2165 }
2166
2167 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2168
2169 static tree
2170 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2171 {
2172   return strip_offset_1 (expr, false, false, offset);
2173 }
2174
2175 /* Returns variant of TYPE that can be used as base for different uses.
2176    We return unsigned type with the same precision, which avoids problems
2177    with overflows.  */
2178
2179 static tree
2180 generic_type_for (tree type)
2181 {
2182   if (POINTER_TYPE_P (type))
2183     return unsigned_type_for (type);
2184
2185   if (TYPE_UNSIGNED (type))
2186     return type;
2187
2188   return unsigned_type_for (type);
2189 }
2190
2191 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2192    the bitmap to that we should store it.  */
2193
2194 static struct ivopts_data *fd_ivopts_data;
2195 static tree
2196 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2197 {
2198   bitmap *depends_on = (bitmap *) data;
2199   struct version_info *info;
2200
2201   if (TREE_CODE (*expr_p) != SSA_NAME)
2202     return NULL_TREE;
2203   info = name_info (fd_ivopts_data, *expr_p);
2204
2205   if (!info->inv_id || info->has_nonlin_use)
2206     return NULL_TREE;
2207
2208   if (!*depends_on)
2209     *depends_on = BITMAP_ALLOC (NULL);
2210   bitmap_set_bit (*depends_on, info->inv_id);
2211
2212   return NULL_TREE;
2213 }
2214
2215 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2216    position to POS.  If USE is not NULL, the candidate is set as related to
2217    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2218    replacement of the final value of the iv by a direct computation.  */
2219
2220 static struct iv_cand *
2221 add_candidate_1 (struct ivopts_data *data,
2222                  tree base, tree step, bool important, enum iv_position pos,
2223                  struct iv_use *use, gimple incremented_at)
2224 {
2225   unsigned i;
2226   struct iv_cand *cand = NULL;
2227   tree type, orig_type;
2228
2229   /* For non-original variables, make sure their values are computed in a type
2230      that does not invoke undefined behavior on overflows (since in general,
2231      we cannot prove that these induction variables are non-wrapping).  */
2232   if (pos != IP_ORIGINAL)
2233     {
2234       orig_type = TREE_TYPE (base);
2235       type = generic_type_for (orig_type);
2236       if (type != orig_type)
2237         {
2238           base = fold_convert (type, base);
2239           step = fold_convert (type, step);
2240         }
2241     }
2242
2243   for (i = 0; i < n_iv_cands (data); i++)
2244     {
2245       cand = iv_cand (data, i);
2246
2247       if (cand->pos != pos)
2248         continue;
2249
2250       if (cand->incremented_at != incremented_at
2251           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2252               && cand->ainc_use != use))
2253         continue;
2254
2255       if (!cand->iv)
2256         {
2257           if (!base && !step)
2258             break;
2259
2260           continue;
2261         }
2262
2263       if (!base && !step)
2264         continue;
2265
2266       if (operand_equal_p (base, cand->iv->base, 0)
2267           && operand_equal_p (step, cand->iv->step, 0)
2268           && (TYPE_PRECISION (TREE_TYPE (base))
2269               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2270         break;
2271     }
2272
2273   if (i == n_iv_cands (data))
2274     {
2275       cand = XCNEW (struct iv_cand);
2276       cand->id = i;
2277
2278       if (!base && !step)
2279         cand->iv = NULL;
2280       else
2281         cand->iv = alloc_iv (base, step);
2282
2283       cand->pos = pos;
2284       if (pos != IP_ORIGINAL && cand->iv)
2285         {
2286           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2287           cand->var_after = cand->var_before;
2288         }
2289       cand->important = important;
2290       cand->incremented_at = incremented_at;
2291       data->iv_candidates.safe_push (cand);
2292
2293       if (step
2294           && TREE_CODE (step) != INTEGER_CST)
2295         {
2296           fd_ivopts_data = data;
2297           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2298         }
2299
2300       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2301         cand->ainc_use = use;
2302       else
2303         cand->ainc_use = NULL;
2304
2305       if (dump_file && (dump_flags & TDF_DETAILS))
2306         dump_cand (dump_file, cand);
2307     }
2308
2309   if (important && !cand->important)
2310     {
2311       cand->important = true;
2312       if (dump_file && (dump_flags & TDF_DETAILS))
2313         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2314     }
2315
2316   if (use)
2317     {
2318       bitmap_set_bit (use->related_cands, i);
2319       if (dump_file && (dump_flags & TDF_DETAILS))
2320         fprintf (dump_file, "Candidate %d is related to use %d\n",
2321                  cand->id, use->id);
2322     }
2323
2324   return cand;
2325 }
2326
2327 /* Returns true if incrementing the induction variable at the end of the LOOP
2328    is allowed.
2329
2330    The purpose is to avoid splitting latch edge with a biv increment, thus
2331    creating a jump, possibly confusing other optimization passes and leaving
2332    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2333    is not available (so we do not have a better alternative), or if the latch
2334    edge is already nonempty.  */
2335
2336 static bool
2337 allow_ip_end_pos_p (struct loop *loop)
2338 {
2339   if (!ip_normal_pos (loop))
2340     return true;
2341
2342   if (!empty_block_p (ip_end_pos (loop)))
2343     return true;
2344
2345   return false;
2346 }
2347
2348 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2349    Important field is set to IMPORTANT.  */
2350
2351 static void
2352 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2353                         bool important, struct iv_use *use)
2354 {
2355   basic_block use_bb = gimple_bb (use->stmt);
2356   enum machine_mode mem_mode;
2357   unsigned HOST_WIDE_INT cstepi;
2358
2359   /* If we insert the increment in any position other than the standard
2360      ones, we must ensure that it is incremented once per iteration.
2361      It must not be in an inner nested loop, or one side of an if
2362      statement.  */
2363   if (use_bb->loop_father != data->current_loop
2364       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2365       || stmt_could_throw_p (use->stmt)
2366       || !cst_and_fits_in_hwi (step))
2367     return;
2368
2369   cstepi = int_cst_value (step);
2370
2371   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2372   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2373         || USE_STORE_PRE_INCREMENT (mem_mode))
2374        && GET_MODE_SIZE (mem_mode) == cstepi)
2375       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2376            || USE_STORE_PRE_DECREMENT (mem_mode))
2377           && GET_MODE_SIZE (mem_mode) == -cstepi))
2378     {
2379       enum tree_code code = MINUS_EXPR;
2380       tree new_base;
2381       tree new_step = step;
2382
2383       if (POINTER_TYPE_P (TREE_TYPE (base)))
2384         {
2385           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2386           code = POINTER_PLUS_EXPR;
2387         }
2388       else
2389         new_step = fold_convert (TREE_TYPE (base), new_step);
2390       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2391       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2392                        use->stmt);
2393     }
2394   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2395         || USE_STORE_POST_INCREMENT (mem_mode))
2396        && GET_MODE_SIZE (mem_mode) == cstepi)
2397       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2398            || USE_STORE_POST_DECREMENT (mem_mode))
2399           && GET_MODE_SIZE (mem_mode) == -cstepi))
2400     {
2401       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2402                        use->stmt);
2403     }
2404 }
2405
2406 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2407    position to POS.  If USE is not NULL, the candidate is set as related to
2408    it.  The candidate computation is scheduled on all available positions.  */
2409
2410 static void
2411 add_candidate (struct ivopts_data *data,
2412                tree base, tree step, bool important, struct iv_use *use)
2413 {
2414   if (ip_normal_pos (data->current_loop))
2415     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2416   if (ip_end_pos (data->current_loop)
2417       && allow_ip_end_pos_p (data->current_loop))
2418     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2419
2420   if (use != NULL && use->type == USE_ADDRESS)
2421     add_autoinc_candidates (data, base, step, important, use);
2422 }
2423
2424 /* Adds standard iv candidates.  */
2425
2426 static void
2427 add_standard_iv_candidates (struct ivopts_data *data)
2428 {
2429   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2430
2431   /* The same for a double-integer type if it is still fast enough.  */
2432   if (TYPE_PRECISION
2433         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2434       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2435     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2436                    build_int_cst (long_integer_type_node, 1), true, NULL);
2437
2438   /* The same for a double-integer type if it is still fast enough.  */
2439   if (TYPE_PRECISION
2440         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2441       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2442     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2443                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2444 }
2445
2446
2447 /* Adds candidates bases on the old induction variable IV.  */
2448
2449 static void
2450 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2451 {
2452   gimple phi;
2453   tree def;
2454   struct iv_cand *cand;
2455
2456   add_candidate (data, iv->base, iv->step, true, NULL);
2457
2458   /* The same, but with initial value zero.  */
2459   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2460     add_candidate (data, size_int (0), iv->step, true, NULL);
2461   else
2462     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2463                    iv->step, true, NULL);
2464
2465   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2466   if (gimple_code (phi) == GIMPLE_PHI)
2467     {
2468       /* Additionally record the possibility of leaving the original iv
2469          untouched.  */
2470       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2471       cand = add_candidate_1 (data,
2472                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2473                               SSA_NAME_DEF_STMT (def));
2474       cand->var_before = iv->ssa_name;
2475       cand->var_after = def;
2476     }
2477 }
2478
2479 /* Adds candidates based on the old induction variables.  */
2480
2481 static void
2482 add_old_ivs_candidates (struct ivopts_data *data)
2483 {
2484   unsigned i;
2485   struct iv *iv;
2486   bitmap_iterator bi;
2487
2488   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2489     {
2490       iv = ver_info (data, i)->iv;
2491       if (iv && iv->biv_p && !integer_zerop (iv->step))
2492         add_old_iv_candidates (data, iv);
2493     }
2494 }
2495
2496 /* Adds candidates based on the value of the induction variable IV and USE.  */
2497
2498 static void
2499 add_iv_value_candidates (struct ivopts_data *data,
2500                          struct iv *iv, struct iv_use *use)
2501 {
2502   unsigned HOST_WIDE_INT offset;
2503   tree base;
2504   tree basetype;
2505
2506   add_candidate (data, iv->base, iv->step, false, use);
2507
2508   /* The same, but with initial value zero.  Make such variable important,
2509      since it is generic enough so that possibly many uses may be based
2510      on it.  */
2511   basetype = TREE_TYPE (iv->base);
2512   if (POINTER_TYPE_P (basetype))
2513     basetype = sizetype;
2514   add_candidate (data, build_int_cst (basetype, 0),
2515                  iv->step, true, use);
2516
2517   /* Third, try removing the constant offset.  Make sure to even
2518      add a candidate for &a[0] vs. (T *)&a.  */
2519   base = strip_offset (iv->base, &offset);
2520   if (offset
2521       || base != iv->base)
2522     add_candidate (data, base, iv->step, false, use);
2523 }
2524
2525 /* Adds candidates based on the uses.  */
2526
2527 static void
2528 add_derived_ivs_candidates (struct ivopts_data *data)
2529 {
2530   unsigned i;
2531
2532   for (i = 0; i < n_iv_uses (data); i++)
2533     {
2534       struct iv_use *use = iv_use (data, i);
2535
2536       if (!use)
2537         continue;
2538
2539       switch (use->type)
2540         {
2541         case USE_NONLINEAR_EXPR:
2542         case USE_COMPARE:
2543         case USE_ADDRESS:
2544           /* Just add the ivs based on the value of the iv used here.  */
2545           add_iv_value_candidates (data, use->iv, use);
2546           break;
2547
2548         default:
2549           gcc_unreachable ();
2550         }
2551     }
2552 }
2553
2554 /* Record important candidates and add them to related_cands bitmaps
2555    if needed.  */
2556
2557 static void
2558 record_important_candidates (struct ivopts_data *data)
2559 {
2560   unsigned i;
2561   struct iv_use *use;
2562
2563   for (i = 0; i < n_iv_cands (data); i++)
2564     {
2565       struct iv_cand *cand = iv_cand (data, i);
2566
2567       if (cand->important)
2568         bitmap_set_bit (data->important_candidates, i);
2569     }
2570
2571   data->consider_all_candidates = (n_iv_cands (data)
2572                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2573
2574   if (data->consider_all_candidates)
2575     {
2576       /* We will not need "related_cands" bitmaps in this case,
2577          so release them to decrease peak memory consumption.  */
2578       for (i = 0; i < n_iv_uses (data); i++)
2579         {
2580           use = iv_use (data, i);
2581           BITMAP_FREE (use->related_cands);
2582         }
2583     }
2584   else
2585     {
2586       /* Add important candidates to the related_cands bitmaps.  */
2587       for (i = 0; i < n_iv_uses (data); i++)
2588         bitmap_ior_into (iv_use (data, i)->related_cands,
2589                          data->important_candidates);
2590     }
2591 }
2592
2593 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2594    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2595    we allocate a simple list to every use.  */
2596
2597 static void
2598 alloc_use_cost_map (struct ivopts_data *data)
2599 {
2600   unsigned i, size, s;
2601
2602   for (i = 0; i < n_iv_uses (data); i++)
2603     {
2604       struct iv_use *use = iv_use (data, i);
2605
2606       if (data->consider_all_candidates)
2607         size = n_iv_cands (data);
2608       else
2609         {
2610           s = bitmap_count_bits (use->related_cands);
2611
2612           /* Round up to the power of two, so that moduling by it is fast.  */
2613           size = s ? (1 << ceil_log2 (s)) : 1;
2614         }
2615
2616       use->n_map_members = size;
2617       use->cost_map = XCNEWVEC (struct cost_pair, size);
2618     }
2619 }
2620
2621 /* Returns description of computation cost of expression whose runtime
2622    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2623
2624 static comp_cost
2625 new_cost (unsigned runtime, unsigned complexity)
2626 {
2627   comp_cost cost;
2628
2629   cost.cost = runtime;
2630   cost.complexity = complexity;
2631
2632   return cost;
2633 }
2634
2635 /* Adds costs COST1 and COST2.  */
2636
2637 static comp_cost
2638 add_costs (comp_cost cost1, comp_cost cost2)
2639 {
2640   cost1.cost += cost2.cost;
2641   cost1.complexity += cost2.complexity;
2642
2643   return cost1;
2644 }
2645 /* Subtracts costs COST1 and COST2.  */
2646
2647 static comp_cost
2648 sub_costs (comp_cost cost1, comp_cost cost2)
2649 {
2650   cost1.cost -= cost2.cost;
2651   cost1.complexity -= cost2.complexity;
2652
2653   return cost1;
2654 }
2655
2656 /* Returns a negative number if COST1 < COST2, a positive number if
2657    COST1 > COST2, and 0 if COST1 = COST2.  */
2658
2659 static int
2660 compare_costs (comp_cost cost1, comp_cost cost2)
2661 {
2662   if (cost1.cost == cost2.cost)
2663     return cost1.complexity - cost2.complexity;
2664
2665   return cost1.cost - cost2.cost;
2666 }
2667
2668 /* Returns true if COST is infinite.  */
2669
2670 static bool
2671 infinite_cost_p (comp_cost cost)
2672 {
2673   return cost.cost == INFTY;
2674 }
2675
2676 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2677    on invariants DEPENDS_ON and that the value used in expressing it
2678    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2679
2680 static void
2681 set_use_iv_cost (struct ivopts_data *data,
2682                  struct iv_use *use, struct iv_cand *cand,
2683                  comp_cost cost, bitmap depends_on, tree value,
2684                  enum tree_code comp, int inv_expr_id)
2685 {
2686   unsigned i, s;
2687
2688   if (infinite_cost_p (cost))
2689     {
2690       BITMAP_FREE (depends_on);
2691       return;
2692     }
2693
2694   if (data->consider_all_candidates)
2695     {
2696       use->cost_map[cand->id].cand = cand;
2697       use->cost_map[cand->id].cost = cost;
2698       use->cost_map[cand->id].depends_on = depends_on;
2699       use->cost_map[cand->id].value = value;
2700       use->cost_map[cand->id].comp = comp;
2701       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2702       return;
2703     }
2704
2705   /* n_map_members is a power of two, so this computes modulo.  */
2706   s = cand->id & (use->n_map_members - 1);
2707   for (i = s; i < use->n_map_members; i++)
2708     if (!use->cost_map[i].cand)
2709       goto found;
2710   for (i = 0; i < s; i++)
2711     if (!use->cost_map[i].cand)
2712       goto found;
2713
2714   gcc_unreachable ();
2715
2716 found:
2717   use->cost_map[i].cand = cand;
2718   use->cost_map[i].cost = cost;
2719   use->cost_map[i].depends_on = depends_on;
2720   use->cost_map[i].value = value;
2721   use->cost_map[i].comp = comp;
2722   use->cost_map[i].inv_expr_id = inv_expr_id;
2723 }
2724
2725 /* Gets cost of (USE, CANDIDATE) pair.  */
2726
2727 static struct cost_pair *
2728 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2729                  struct iv_cand *cand)
2730 {
2731   unsigned i, s;
2732   struct cost_pair *ret;
2733
2734   if (!cand)
2735     return NULL;
2736
2737   if (data->consider_all_candidates)
2738     {
2739       ret = use->cost_map + cand->id;
2740       if (!ret->cand)
2741         return NULL;
2742
2743       return ret;
2744     }
2745
2746   /* n_map_members is a power of two, so this computes modulo.  */
2747   s = cand->id & (use->n_map_members - 1);
2748   for (i = s; i < use->n_map_members; i++)
2749     if (use->cost_map[i].cand == cand)
2750       return use->cost_map + i;
2751     else if (use->cost_map[i].cand == NULL)
2752       return NULL;
2753   for (i = 0; i < s; i++)
2754     if (use->cost_map[i].cand == cand)
2755       return use->cost_map + i;
2756     else if (use->cost_map[i].cand == NULL)
2757       return NULL;
2758
2759   return NULL;
2760 }
2761
2762 /* Returns estimate on cost of computing SEQ.  */
2763
2764 static unsigned
2765 seq_cost (rtx seq, bool speed)
2766 {
2767   unsigned cost = 0;
2768   rtx set;
2769
2770   for (; seq; seq = NEXT_INSN (seq))
2771     {
2772       set = single_set (seq);
2773       if (set)
2774         cost += set_src_cost (SET_SRC (set), speed);
2775       else
2776         cost++;
2777     }
2778
2779   return cost;
2780 }
2781
2782 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2783 static rtx
2784 produce_memory_decl_rtl (tree obj, int *regno)
2785 {
2786   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2787   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2788   rtx x;
2789
2790   gcc_assert (obj);
2791   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2792     {
2793       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2794       x = gen_rtx_SYMBOL_REF (address_mode, name);
2795       SET_SYMBOL_REF_DECL (x, obj);
2796       x = gen_rtx_MEM (DECL_MODE (obj), x);
2797       set_mem_addr_space (x, as);
2798       targetm.encode_section_info (obj, x, true);
2799     }
2800   else
2801     {
2802       x = gen_raw_REG (address_mode, (*regno)++);
2803       x = gen_rtx_MEM (DECL_MODE (obj), x);
2804       set_mem_addr_space (x, as);
2805     }
2806
2807   return x;
2808 }
2809
2810 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2811    walk_tree.  DATA contains the actual fake register number.  */
2812
2813 static tree
2814 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2815 {
2816   tree obj = NULL_TREE;
2817   rtx x = NULL_RTX;
2818   int *regno = (int *) data;
2819
2820   switch (TREE_CODE (*expr_p))
2821     {
2822     case ADDR_EXPR:
2823       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2824            handled_component_p (*expr_p);
2825            expr_p = &TREE_OPERAND (*expr_p, 0))
2826         continue;
2827       obj = *expr_p;
2828       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2829         x = produce_memory_decl_rtl (obj, regno);
2830       break;
2831
2832     case SSA_NAME:
2833       *ws = 0;
2834       obj = SSA_NAME_VAR (*expr_p);
2835       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2836       if (!obj)
2837         return NULL_TREE;
2838       if (!DECL_RTL_SET_P (obj))
2839         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2840       break;
2841
2842     case VAR_DECL:
2843     case PARM_DECL:
2844     case RESULT_DECL:
2845       *ws = 0;
2846       obj = *expr_p;
2847
2848       if (DECL_RTL_SET_P (obj))
2849         break;
2850
2851       if (DECL_MODE (obj) == BLKmode)
2852         x = produce_memory_decl_rtl (obj, regno);
2853       else
2854         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2855
2856       break;
2857
2858     default:
2859       break;
2860     }
2861
2862   if (x)
2863     {
2864       decl_rtl_to_reset.safe_push (obj);
2865       SET_DECL_RTL (obj, x);
2866     }
2867
2868   return NULL_TREE;
2869 }
2870
2871 /* Determines cost of the computation of EXPR.  */
2872
2873 static unsigned
2874 computation_cost (tree expr, bool speed)
2875 {
2876   rtx seq, rslt;
2877   tree type = TREE_TYPE (expr);
2878   unsigned cost;
2879   /* Avoid using hard regs in ways which may be unsupported.  */
2880   int regno = LAST_VIRTUAL_REGISTER + 1;
2881   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2882   enum node_frequency real_frequency = node->frequency;
2883
2884   node->frequency = NODE_FREQUENCY_NORMAL;
2885   crtl->maybe_hot_insn_p = speed;
2886   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2887   start_sequence ();
2888   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2889   seq = get_insns ();
2890   end_sequence ();
2891   default_rtl_profile ();
2892   node->frequency = real_frequency;
2893
2894   cost = seq_cost (seq, speed);
2895   if (MEM_P (rslt))
2896     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2897                           TYPE_ADDR_SPACE (type), speed);
2898   else if (!REG_P (rslt))
2899     cost += set_src_cost (rslt, speed);
2900
2901   return cost;
2902 }
2903
2904 /* Returns variable containing the value of candidate CAND at statement AT.  */
2905
2906 static tree
2907 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2908 {
2909   if (stmt_after_increment (loop, cand, stmt))
2910     return cand->var_after;
2911   else
2912     return cand->var_before;
2913 }
2914
2915 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2916    same precision that is at least as wide as the precision of TYPE, stores
2917    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2918    type of A and B.  */
2919
2920 static tree
2921 determine_common_wider_type (tree *a, tree *b)
2922 {
2923   tree wider_type = NULL;
2924   tree suba, subb;
2925   tree atype = TREE_TYPE (*a);
2926
2927   if (CONVERT_EXPR_P (*a))
2928     {
2929       suba = TREE_OPERAND (*a, 0);
2930       wider_type = TREE_TYPE (suba);
2931       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2932         return atype;
2933     }
2934   else
2935     return atype;
2936
2937   if (CONVERT_EXPR_P (*b))
2938     {
2939       subb = TREE_OPERAND (*b, 0);
2940       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2941         return atype;
2942     }
2943   else
2944     return atype;
2945
2946   *a = suba;
2947   *b = subb;
2948   return wider_type;
2949 }
2950
2951 /* Determines the expression by that USE is expressed from induction variable
2952    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2953    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2954
2955 static bool
2956 get_computation_aff (struct loop *loop,
2957                      struct iv_use *use, struct iv_cand *cand, gimple at,
2958                      struct affine_tree_combination *aff)
2959 {
2960   tree ubase = use->iv->base;
2961   tree ustep = use->iv->step;
2962   tree cbase = cand->iv->base;
2963   tree cstep = cand->iv->step, cstep_common;
2964   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2965   tree common_type, var;
2966   tree uutype;
2967   aff_tree cbase_aff, var_aff;
2968   double_int rat;
2969
2970   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2971     {
2972       /* We do not have a precision to express the values of use.  */
2973       return false;
2974     }
2975
2976   var = var_at_stmt (loop, cand, at);
2977   uutype = unsigned_type_for (utype);
2978
2979   /* If the conversion is not noop, perform it.  */
2980   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2981     {
2982       cstep = fold_convert (uutype, cstep);
2983       cbase = fold_convert (uutype, cbase);
2984       var = fold_convert (uutype, var);
2985     }
2986
2987   if (!constant_multiple_of (ustep, cstep, &rat))
2988     return false;
2989
2990   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2991      type, we achieve better folding by computing their difference in this
2992      wider type, and cast the result to UUTYPE.  We do not need to worry about
2993      overflows, as all the arithmetics will in the end be performed in UUTYPE
2994      anyway.  */
2995   common_type = determine_common_wider_type (&ubase, &cbase);
2996
2997   /* use = ubase - ratio * cbase + ratio * var.  */
2998   tree_to_aff_combination (ubase, common_type, aff);
2999   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3000   tree_to_aff_combination (var, uutype, &var_aff);
3001
3002   /* We need to shift the value if we are after the increment.  */
3003   if (stmt_after_increment (loop, cand, at))
3004     {
3005       aff_tree cstep_aff;
3006
3007       if (common_type != uutype)
3008         cstep_common = fold_convert (common_type, cstep);
3009       else
3010         cstep_common = cstep;
3011
3012       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3013       aff_combination_add (&cbase_aff, &cstep_aff);
3014     }
3015
3016   aff_combination_scale (&cbase_aff, -rat);
3017   aff_combination_add (aff, &cbase_aff);
3018   if (common_type != uutype)
3019     aff_combination_convert (aff, uutype);
3020
3021   aff_combination_scale (&var_aff, rat);
3022   aff_combination_add (aff, &var_aff);
3023
3024   return true;
3025 }
3026
3027 /* Return the type of USE.  */
3028
3029 static tree
3030 get_use_type (struct iv_use *use)
3031 {
3032   tree base_type = TREE_TYPE (use->iv->base);
3033   tree type;
3034
3035   if (use->type == USE_ADDRESS)
3036     {
3037       /* The base_type may be a void pointer.  Create a pointer type based on
3038          the mem_ref instead.  */
3039       type = build_pointer_type (TREE_TYPE (*use->op_p));
3040       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3041                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3042     }
3043   else
3044     type = base_type;
3045
3046   return type;
3047 }
3048
3049 /* Determines the expression by that USE is expressed from induction variable
3050    CAND at statement AT in LOOP.  The computation is unshared.  */
3051
3052 static tree
3053 get_computation_at (struct loop *loop,
3054                     struct iv_use *use, struct iv_cand *cand, gimple at)
3055 {
3056   aff_tree aff;
3057   tree type = get_use_type (use);
3058
3059   if (!get_computation_aff (loop, use, cand, at, &aff))
3060     return NULL_TREE;
3061   unshare_aff_combination (&aff);
3062   return fold_convert (type, aff_combination_to_tree (&aff));
3063 }
3064
3065 /* Determines the expression by that USE is expressed from induction variable
3066    CAND in LOOP.  The computation is unshared.  */
3067
3068 static tree
3069 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3070 {
3071   return get_computation_at (loop, use, cand, use->stmt);
3072 }
3073
3074 /* Adjust the cost COST for being in loop setup rather than loop body.
3075    If we're optimizing for space, the loop setup overhead is constant;
3076    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3077 static unsigned
3078 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3079 {
3080   if (cost == INFTY)
3081     return cost;
3082   else if (optimize_loop_for_speed_p (data->current_loop))
3083     return cost / avg_loop_niter (data->current_loop);
3084   else
3085     return cost;
3086 }
3087
3088 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3089    validity for a memory reference accessing memory of mode MODE in
3090    address space AS.  */
3091
3092
3093 bool
3094 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3095                                  addr_space_t as)
3096 {
3097 #define MAX_RATIO 128
3098   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3099   static vec<sbitmap> valid_mult_list;
3100   sbitmap valid_mult;
3101
3102   if (data_index >= valid_mult_list.length ())
3103     valid_mult_list.safe_grow_cleared (data_index + 1);
3104
3105   valid_mult = valid_mult_list[data_index];
3106   if (!valid_mult)
3107     {
3108       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3109       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3110       rtx addr;
3111       HOST_WIDE_INT i;
3112
3113       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3114       bitmap_clear (valid_mult);
3115       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3116       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3117         {
3118           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3119           if (memory_address_addr_space_p (mode, addr, as))
3120             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3121         }
3122
3123       if (dump_file && (dump_flags & TDF_DETAILS))
3124         {
3125           fprintf (dump_file, "  allowed multipliers:");
3126           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3127             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3128               fprintf (dump_file, " %d", (int) i);
3129           fprintf (dump_file, "\n");
3130           fprintf (dump_file, "\n");
3131         }
3132
3133       valid_mult_list[data_index] = valid_mult;
3134     }
3135
3136   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3137     return false;
3138
3139   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3140 }
3141
3142 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3143    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3144    variable is omitted.  Compute the cost for a memory reference that accesses
3145    a memory location of mode MEM_MODE in address space AS.
3146
3147    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3148    size of MEM_MODE / RATIO) is available.  To make this determination, we
3149    look at the size of the increment to be made, which is given in CSTEP.
3150    CSTEP may be zero if the step is unknown.
3151    STMT_AFTER_INC is true iff the statement we're looking at is after the
3152    increment of the original biv.
3153
3154    TODO -- there must be some better way.  This all is quite crude.  */
3155
3156 typedef struct address_cost_data_s
3157 {
3158   HOST_WIDE_INT min_offset, max_offset;
3159   unsigned costs[2][2][2][2];
3160 } *address_cost_data;
3161
3162
3163 static comp_cost
3164 get_address_cost (bool symbol_present, bool var_present,
3165                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3166                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3167                   addr_space_t as, bool speed,
3168                   bool stmt_after_inc, bool *may_autoinc)
3169 {
3170   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3171   static vec<address_cost_data> address_cost_data_list;
3172   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3173   address_cost_data data;
3174   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3175   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3176   unsigned cost, acost, complexity;
3177   bool offset_p, ratio_p, autoinc;
3178   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3179   unsigned HOST_WIDE_INT mask;
3180   unsigned bits;
3181
3182   if (data_index >= address_cost_data_list.length ())
3183     address_cost_data_list.safe_grow_cleared (data_index + 1);
3184
3185   data = address_cost_data_list[data_index];
3186   if (!data)
3187     {
3188       HOST_WIDE_INT i;
3189       HOST_WIDE_INT rat, off = 0;
3190       int old_cse_not_expected, width;
3191       unsigned sym_p, var_p, off_p, rat_p, add_c;
3192       rtx seq, addr, base;
3193       rtx reg0, reg1;
3194
3195       data = (address_cost_data) xcalloc (1, sizeof (*data));
3196
3197       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3198
3199       width = GET_MODE_BITSIZE (address_mode) - 1;
3200       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3201         width = HOST_BITS_PER_WIDE_INT - 1;
3202       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3203
3204       for (i = width; i >= 0; i--)
3205         {
3206           off = -((unsigned HOST_WIDE_INT) 1 << i);
3207           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3208           if (memory_address_addr_space_p (mem_mode, addr, as))
3209             break;
3210         }
3211       data->min_offset = (i == -1? 0 : off);
3212
3213       for (i = width; i >= 0; i--)
3214         {
3215           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3216           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3217           if (memory_address_addr_space_p (mem_mode, addr, as))
3218             break;
3219         }
3220       if (i == -1)
3221         off = 0;
3222       data->max_offset = off;
3223
3224       if (dump_file && (dump_flags & TDF_DETAILS))
3225         {
3226           fprintf (dump_file, "get_address_cost:\n");
3227           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3228                    GET_MODE_NAME (mem_mode),
3229                    data->min_offset);
3230           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3231                    GET_MODE_NAME (mem_mode),
3232                    data->max_offset);
3233         }
3234
3235       rat = 1;
3236       for (i = 2; i <= MAX_RATIO; i++)
3237         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3238           {
3239             rat = i;
3240             break;
3241           }
3242
3243       /* Compute the cost of various addressing modes.  */
3244       acost = 0;
3245       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3246       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3247
3248       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3249           || USE_STORE_PRE_DECREMENT (mem_mode))
3250         {
3251           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3252           has_predec[mem_mode]
3253             = memory_address_addr_space_p (mem_mode, addr, as);
3254         }
3255       if (USE_LOAD_POST_DECREMENT (mem_mode)
3256           || USE_STORE_POST_DECREMENT (mem_mode))
3257         {
3258           addr = gen_rtx_POST_DEC (address_mode, reg0);
3259           has_postdec[mem_mode]
3260             = memory_address_addr_space_p (mem_mode, addr, as);
3261         }
3262       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3263           || USE_STORE_PRE_DECREMENT (mem_mode))
3264         {
3265           addr = gen_rtx_PRE_INC (address_mode, reg0);
3266           has_preinc[mem_mode]
3267             = memory_address_addr_space_p (mem_mode, addr, as);
3268         }
3269       if (USE_LOAD_POST_INCREMENT (mem_mode)
3270           || USE_STORE_POST_INCREMENT (mem_mode))
3271         {
3272           addr = gen_rtx_POST_INC (address_mode, reg0);
3273           has_postinc[mem_mode]
3274             = memory_address_addr_space_p (mem_mode, addr, as);
3275         }
3276       for (i = 0; i < 16; i++)
3277         {
3278           sym_p = i & 1;
3279           var_p = (i >> 1) & 1;
3280           off_p = (i >> 2) & 1;
3281           rat_p = (i >> 3) & 1;
3282
3283           addr = reg0;
3284           if (rat_p)
3285             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3286                                    gen_int_mode (rat, address_mode));
3287
3288           if (var_p)
3289             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3290
3291           if (sym_p)
3292             {
3293               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3294               /* ??? We can run into trouble with some backends by presenting
3295                  it with symbols which haven't been properly passed through
3296                  targetm.encode_section_info.  By setting the local bit, we
3297                  enhance the probability of things working.  */
3298               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3299
3300               if (off_p)
3301                 base = gen_rtx_fmt_e (CONST, address_mode,
3302                                       gen_rtx_fmt_ee
3303                                         (PLUS, address_mode, base,
3304                                          gen_int_mode (off, address_mode)));
3305             }
3306           else if (off_p)
3307             base = gen_int_mode (off, address_mode);
3308           else
3309             base = NULL_RTX;
3310
3311           if (base)
3312             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3313
3314           start_sequence ();
3315           /* To avoid splitting addressing modes, pretend that no cse will
3316              follow.  */
3317           old_cse_not_expected = cse_not_expected;
3318           cse_not_expected = true;
3319           addr = memory_address_addr_space (mem_mode, addr, as);
3320           cse_not_expected = old_cse_not_expected;
3321           seq = get_insns ();
3322           end_sequence ();
3323
3324           acost = seq_cost (seq, speed);
3325           acost += address_cost (addr, mem_mode, as, speed);
3326
3327           if (!acost)
3328             acost = 1;
3329           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3330         }
3331
3332       /* On some targets, it is quite expensive to load symbol to a register,
3333          which makes addresses that contain symbols look much more expensive.
3334          However, the symbol will have to be loaded in any case before the
3335          loop (and quite likely we have it in register already), so it does not
3336          make much sense to penalize them too heavily.  So make some final
3337          tweaks for the SYMBOL_PRESENT modes:
3338
3339          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3340          var is cheaper, use this mode with small penalty.
3341          If VAR_PRESENT is true, try whether the mode with
3342          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3343          if this is the case, use it.  */
3344       add_c = add_cost (speed, address_mode);
3345       for (i = 0; i < 8; i++)
3346         {
3347           var_p = i & 1;
3348           off_p = (i >> 1) & 1;
3349           rat_p = (i >> 2) & 1;
3350
3351           acost = data->costs[0][1][off_p][rat_p] + 1;
3352           if (var_p)
3353             acost += add_c;
3354
3355           if (acost < data->costs[1][var_p][off_p][rat_p])
3356             data->costs[1][var_p][off_p][rat_p] = acost;
3357         }
3358
3359       if (dump_file && (dump_flags & TDF_DETAILS))
3360         {
3361           fprintf (dump_file, "Address costs:\n");
3362
3363           for (i = 0; i < 16; i++)
3364             {
3365               sym_p = i & 1;
3366               var_p = (i >> 1) & 1;
3367               off_p = (i >> 2) & 1;
3368               rat_p = (i >> 3) & 1;
3369
3370               fprintf (dump_file, "  ");
3371               if (sym_p)
3372                 fprintf (dump_file, "sym + ");
3373               if (var_p)
3374                 fprintf (dump_file, "var + ");
3375               if (off_p)
3376                 fprintf (dump_file, "cst + ");
3377               if (rat_p)
3378                 fprintf (dump_file, "rat * ");
3379
3380               acost = data->costs[sym_p][var_p][off_p][rat_p];
3381               fprintf (dump_file, "index costs %d\n", acost);
3382             }
3383           if (has_predec[mem_mode] || has_postdec[mem_mode]
3384               || has_preinc[mem_mode] || has_postinc[mem_mode])
3385             fprintf (dump_file, "  May include autoinc/dec\n");
3386           fprintf (dump_file, "\n");
3387         }
3388
3389       address_cost_data_list[data_index] = data;
3390     }
3391
3392   bits = GET_MODE_BITSIZE (address_mode);
3393   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3394   offset &= mask;
3395   if ((offset >> (bits - 1) & 1))
3396     offset |= ~mask;
3397   s_offset = offset;
3398
3399   autoinc = false;
3400   msize = GET_MODE_SIZE (mem_mode);
3401   autoinc_offset = offset;
3402   if (stmt_after_inc)
3403     autoinc_offset += ratio * cstep;
3404   if (symbol_present || var_present || ratio != 1)
3405     autoinc = false;
3406   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3407                && msize == cstep)
3408            || (has_postdec[mem_mode] && autoinc_offset == 0
3409                && msize == -cstep)
3410            || (has_preinc[mem_mode] && autoinc_offset == msize
3411                && msize == cstep)
3412            || (has_predec[mem_mode] && autoinc_offset == -msize
3413                && msize == -cstep))
3414     autoinc = true;
3415
3416   cost = 0;
3417   offset_p = (s_offset != 0
3418               && data->min_offset <= s_offset
3419               && s_offset <= data->max_offset);
3420   ratio_p = (ratio != 1
3421              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3422
3423   if (ratio != 1 && !ratio_p)
3424     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3425
3426   if (s_offset && !offset_p && !symbol_present)
3427     cost += add_cost (speed, address_mode);
3428
3429   if (may_autoinc)
3430     *may_autoinc = autoinc;
3431   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3432   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3433   return new_cost (cost + acost, complexity);
3434 }
3435
3436  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3437     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3438     calculating the operands of EXPR.  Returns true if successful, and returns
3439     the cost in COST.  */
3440
3441 static bool
3442 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3443                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3444 {
3445   comp_cost res;
3446   tree op1 = TREE_OPERAND (expr, 1);
3447   tree cst = TREE_OPERAND (mult, 1);
3448   tree multop = TREE_OPERAND (mult, 0);
3449   int m = exact_log2 (int_cst_value (cst));
3450   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3451   int sa_cost;
3452
3453   if (!(m >= 0 && m < maxm))
3454     return false;
3455
3456   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3457              ? shiftadd_cost (speed, mode, m)
3458              : (mult == op1
3459                 ? shiftsub1_cost (speed, mode, m)
3460                 : shiftsub0_cost (speed, mode, m)));
3461   res = new_cost (sa_cost, 0);
3462   res = add_costs (res, mult == op1 ? cost0 : cost1);
3463
3464   STRIP_NOPS (multop);
3465   if (!is_gimple_val (multop))
3466     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3467
3468   *cost = res;
3469   return true;
3470 }
3471
3472 /* Estimates cost of forcing expression EXPR into a variable.  */
3473
3474 static comp_cost
3475 force_expr_to_var_cost (tree expr, bool speed)
3476 {
3477   static bool costs_initialized = false;
3478   static unsigned integer_cost [2];
3479   static unsigned symbol_cost [2];
3480   static unsigned address_cost [2];
3481   tree op0, op1;
3482   comp_cost cost0, cost1, cost;
3483   enum machine_mode mode;
3484
3485   if (!costs_initialized)
3486     {
3487       tree type = build_pointer_type (integer_type_node);
3488       tree var, addr;
3489       rtx x;
3490       int i;
3491
3492       var = create_tmp_var_raw (integer_type_node, "test_var");
3493       TREE_STATIC (var) = 1;
3494       x = produce_memory_decl_rtl (var, NULL);
3495       SET_DECL_RTL (var, x);
3496
3497       addr = build1 (ADDR_EXPR, type, var);
3498
3499
3500       for (i = 0; i < 2; i++)
3501         {
3502           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3503                                                              2000), i);
3504
3505           symbol_cost[i] = computation_cost (addr, i) + 1;
3506
3507           address_cost[i]
3508             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3509           if (dump_file && (dump_flags & TDF_DETAILS))
3510             {
3511               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3512               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3513               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3514               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3515               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3516               fprintf (dump_file, "\n");
3517             }
3518         }
3519
3520       costs_initialized = true;
3521     }
3522
3523   STRIP_NOPS (expr);
3524
3525   if (SSA_VAR_P (expr))
3526     return no_cost;
3527
3528   if (is_gimple_min_invariant (expr))
3529     {
3530       if (TREE_CODE (expr) == INTEGER_CST)
3531         return new_cost (integer_cost [speed], 0);
3532
3533       if (TREE_CODE (expr) == ADDR_EXPR)
3534         {
3535           tree obj = TREE_OPERAND (expr, 0);
3536
3537           if (TREE_CODE (obj) == VAR_DECL
3538               || TREE_CODE (obj) == PARM_DECL
3539               || TREE_CODE (obj) == RESULT_DECL)
3540             return new_cost (symbol_cost [speed], 0);
3541         }
3542
3543       return new_cost (address_cost [speed], 0);
3544     }
3545
3546   switch (TREE_CODE (expr))
3547     {
3548     case POINTER_PLUS_EXPR:
3549     case PLUS_EXPR:
3550     case MINUS_EXPR:
3551     case MULT_EXPR:
3552       op0 = TREE_OPERAND (expr, 0);
3553       op1 = TREE_OPERAND (expr, 1);
3554       STRIP_NOPS (op0);
3555       STRIP_NOPS (op1);
3556
3557       if (is_gimple_val (op0))
3558         cost0 = no_cost;
3559       else
3560         cost0 = force_expr_to_var_cost (op0, speed);
3561
3562       if (is_gimple_val (op1))
3563         cost1 = no_cost;
3564       else
3565         cost1 = force_expr_to_var_cost (op1, speed);
3566
3567       break;
3568
3569     case NEGATE_EXPR:
3570       op0 = TREE_OPERAND (expr, 0);
3571       STRIP_NOPS (op0);
3572       op1 = NULL_TREE;
3573
3574       if (is_gimple_val (op0))
3575         cost0 = no_cost;
3576       else
3577         cost0 = force_expr_to_var_cost (op0, speed);
3578
3579       cost1 = no_cost;
3580       break;
3581
3582     default:
3583       /* Just an arbitrary value, FIXME.  */
3584       return new_cost (target_spill_cost[speed], 0);
3585     }
3586
3587   mode = TYPE_MODE (TREE_TYPE (expr));
3588   switch (TREE_CODE (expr))
3589     {
3590     case POINTER_PLUS_EXPR:
3591     case PLUS_EXPR:
3592     case MINUS_EXPR:
3593     case NEGATE_EXPR:
3594       cost = new_cost (add_cost (speed, mode), 0);
3595       if (TREE_CODE (expr) != NEGATE_EXPR)
3596         {
3597           tree mult = NULL_TREE;
3598           comp_cost sa_cost;
3599           if (TREE_CODE (op1) == MULT_EXPR)
3600             mult = op1;
3601           else if (TREE_CODE (op0) == MULT_EXPR)
3602             mult = op0;
3603
3604           if (mult != NULL_TREE
3605               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3606               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3607                                     speed, &sa_cost))
3608             return sa_cost;
3609         }
3610       break;
3611
3612     case MULT_EXPR:
3613       if (cst_and_fits_in_hwi (op0))
3614         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3615                                              mode, speed), 0);
3616       else if (cst_and_fits_in_hwi (op1))
3617         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3618                                              mode, speed), 0);
3619       else
3620         return new_cost (target_spill_cost [speed], 0);
3621       break;
3622
3623     default:
3624       gcc_unreachable ();
3625     }
3626
3627   cost = add_costs (cost, cost0);
3628   cost = add_costs (cost, cost1);
3629
3630   /* Bound the cost by target_spill_cost.  The parts of complicated
3631      computations often are either loop invariant or at least can
3632      be shared between several iv uses, so letting this grow without
3633      limits would not give reasonable results.  */
3634   if (cost.cost > (int) target_spill_cost [speed])
3635     cost.cost = target_spill_cost [speed];
3636
3637   return cost;
3638 }
3639
3640 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3641    invariants the computation depends on.  */
3642
3643 static comp_cost
3644 force_var_cost (struct ivopts_data *data,
3645                 tree expr, bitmap *depends_on)
3646 {
3647   if (depends_on)
3648     {
3649       fd_ivopts_data = data;
3650       walk_tree (&expr, find_depends, depends_on, NULL);
3651     }
3652
3653   return force_expr_to_var_cost (expr, data->speed);
3654 }
3655
3656 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3657    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3658    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3659    invariants the computation depends on.  */
3660
3661 static comp_cost
3662 split_address_cost (struct ivopts_data *data,
3663                     tree addr, bool *symbol_present, bool *var_present,
3664                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3665 {
3666   tree core;
3667   HOST_WIDE_INT bitsize;
3668   HOST_WIDE_INT bitpos;
3669   tree toffset;
3670   enum machine_mode mode;
3671   int unsignedp, volatilep;
3672
3673   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3674                               &unsignedp, &volatilep, false);
3675
3676   if (toffset != 0
3677       || bitpos % BITS_PER_UNIT != 0
3678       || TREE_CODE (core) != VAR_DECL)
3679     {
3680       *symbol_present = false;
3681       *var_present = true;
3682       fd_ivopts_data = data;
3683       walk_tree (&addr, find_depends, depends_on, NULL);
3684       return new_cost (target_spill_cost[data->speed], 0);
3685     }
3686
3687   *offset += bitpos / BITS_PER_UNIT;
3688   if (TREE_STATIC (core)
3689       || DECL_EXTERNAL (core))
3690     {
3691       *symbol_present = true;
3692       *var_present = false;
3693       return no_cost;
3694     }
3695
3696   *symbol_present = false;
3697   *var_present = true;
3698   return no_cost;
3699 }
3700
3701 /* Estimates cost of expressing difference of addresses E1 - E2 as
3702    var + symbol + offset.  The value of offset is added to OFFSET,
3703    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3704    part is missing.  DEPENDS_ON is a set of the invariants the computation
3705    depends on.  */
3706
3707 static comp_cost
3708 ptr_difference_cost (struct ivopts_data *data,
3709                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3710                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3711 {
3712   HOST_WIDE_INT diff = 0;
3713   aff_tree aff_e1, aff_e2;
3714   tree type;
3715
3716   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3717
3718   if (ptr_difference_const (e1, e2, &diff))
3719     {
3720       *offset += diff;
3721       *symbol_present = false;
3722       *var_present = false;
3723       return no_cost;
3724     }
3725
3726   if (integer_zerop (e2))
3727     return split_address_cost (data, TREE_OPERAND (e1, 0),
3728                                symbol_present, var_present, offset, depends_on);
3729
3730   *symbol_present = false;
3731   *var_present = true;
3732
3733   type = signed_type_for (TREE_TYPE (e1));
3734   tree_to_aff_combination (e1, type, &aff_e1);
3735   tree_to_aff_combination (e2, type, &aff_e2);
3736   aff_combination_scale (&aff_e2, double_int_minus_one);
3737   aff_combination_add (&aff_e1, &aff_e2);
3738
3739   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3740 }
3741
3742 /* Estimates cost of expressing difference E1 - E2 as
3743    var + symbol + offset.  The value of offset is added to OFFSET,
3744    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3745    part is missing.  DEPENDS_ON is a set of the invariants the computation
3746    depends on.  */
3747
3748 static comp_cost
3749 difference_cost (struct ivopts_data *data,
3750                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3751                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3752 {
3753   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3754   unsigned HOST_WIDE_INT off1, off2;
3755   aff_tree aff_e1, aff_e2;
3756   tree type;
3757
3758   e1 = strip_offset (e1, &off1);
3759   e2 = strip_offset (e2, &off2);
3760   *offset += off1 - off2;
3761
3762   STRIP_NOPS (e1);
3763   STRIP_NOPS (e2);
3764
3765   if (TREE_CODE (e1) == ADDR_EXPR)
3766     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3767                                 offset, depends_on);
3768   *symbol_present = false;
3769
3770   if (operand_equal_p (e1, e2, 0))
3771     {
3772       *var_present = false;
3773       return no_cost;
3774     }
3775
3776   *var_present = true;
3777
3778   if (integer_zerop (e2))
3779     return force_var_cost (data, e1, depends_on);
3780
3781   if (integer_zerop (e1))
3782     {
3783       comp_cost cost = force_var_cost (data, e2, depends_on);
3784       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3785       return cost;
3786     }
3787
3788   type = signed_type_for (TREE_TYPE (e1));
3789   tree_to_aff_combination (e1, type, &aff_e1);
3790   tree_to_aff_combination (e2, type, &aff_e2);
3791   aff_combination_scale (&aff_e2, double_int_minus_one);
3792   aff_combination_add (&aff_e1, &aff_e2);
3793
3794   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3795 }
3796
3797 /* Returns true if AFF1 and AFF2 are identical.  */
3798
3799 static bool
3800 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3801 {
3802   unsigned i;
3803
3804   if (aff1->n != aff2->n)
3805     return false;
3806
3807   for (i = 0; i < aff1->n; i++)
3808     {
3809       if (aff1->elts[i].coef != aff2->elts[i].coef)
3810         return false;
3811
3812       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3813         return false;
3814     }
3815   return true;
3816 }
3817
3818 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3819
3820 static int
3821 get_expr_id (struct ivopts_data *data, tree expr)
3822 {
3823   struct iv_inv_expr_ent ent;
3824   struct iv_inv_expr_ent **slot;
3825
3826   ent.expr = expr;
3827   ent.hash = iterative_hash_expr (expr, 0);
3828   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3829   if (*slot)
3830     return (*slot)->id;
3831
3832   *slot = XNEW (struct iv_inv_expr_ent);
3833   (*slot)->expr = expr;
3834   (*slot)->hash = ent.hash;
3835   (*slot)->id = data->inv_expr_id++;
3836   return (*slot)->id;
3837 }
3838
3839 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3840    requires a new compiler generated temporary.  Returns -1 otherwise.
3841    ADDRESS_P is a flag indicating if the expression is for address
3842    computation.  */
3843
3844 static int
3845 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3846                             tree cbase, HOST_WIDE_INT ratio,
3847                             bool address_p)
3848 {
3849   aff_tree ubase_aff, cbase_aff;
3850   tree expr, ub, cb;
3851
3852   STRIP_NOPS (ubase);
3853   STRIP_NOPS (cbase);
3854   ub = ubase;
3855   cb = cbase;
3856
3857   if ((TREE_CODE (ubase) == INTEGER_CST)
3858       && (TREE_CODE (cbase) == INTEGER_CST))
3859     return -1;
3860
3861   /* Strips the constant part. */
3862   if (TREE_CODE (ubase) == PLUS_EXPR
3863       || TREE_CODE (ubase) == MINUS_EXPR
3864       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3865     {
3866       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3867         ubase = TREE_OPERAND (ubase, 0);
3868     }
3869
3870   /* Strips the constant part. */
3871   if (TREE_CODE (cbase) == PLUS_EXPR
3872       || TREE_CODE (cbase) == MINUS_EXPR
3873       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3874     {
3875       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3876         cbase = TREE_OPERAND (cbase, 0);
3877     }
3878
3879   if (address_p)
3880     {
3881       if (((TREE_CODE (ubase) == SSA_NAME)
3882            || (TREE_CODE (ubase) == ADDR_EXPR
3883                && is_gimple_min_invariant (ubase)))
3884           && (TREE_CODE (cbase) == INTEGER_CST))
3885         return -1;
3886
3887       if (((TREE_CODE (cbase) == SSA_NAME)
3888            || (TREE_CODE (cbase) == ADDR_EXPR
3889                && is_gimple_min_invariant (cbase)))
3890           && (TREE_CODE (ubase) == INTEGER_CST))
3891         return -1;
3892     }
3893
3894   if (ratio == 1)
3895     {
3896       if (operand_equal_p (ubase, cbase, 0))
3897         return -1;
3898
3899       if (TREE_CODE (ubase) == ADDR_EXPR
3900           && TREE_CODE (cbase) == ADDR_EXPR)
3901         {
3902           tree usym, csym;
3903
3904           usym = TREE_OPERAND (ubase, 0);
3905           csym = TREE_OPERAND (cbase, 0);
3906           if (TREE_CODE (usym) == ARRAY_REF)
3907             {
3908               tree ind = TREE_OPERAND (usym, 1);
3909               if (TREE_CODE (ind) == INTEGER_CST
3910                   && host_integerp (ind, 0)
3911                   && TREE_INT_CST_LOW (ind) == 0)
3912                 usym = TREE_OPERAND (usym, 0);
3913             }
3914           if (TREE_CODE (csym) == ARRAY_REF)
3915             {
3916               tree ind = TREE_OPERAND (csym, 1);
3917               if (TREE_CODE (ind) == INTEGER_CST
3918                   && host_integerp (ind, 0)
3919                   && TREE_INT_CST_LOW (ind) == 0)
3920                 csym = TREE_OPERAND (csym, 0);
3921             }
3922           if (operand_equal_p (usym, csym, 0))
3923             return -1;
3924         }
3925       /* Now do more complex comparison  */
3926       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3927       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3928       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3929         return -1;
3930     }
3931
3932   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3933   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3934
3935   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3936   aff_combination_add (&ubase_aff, &cbase_aff);
3937   expr = aff_combination_to_tree (&ubase_aff);
3938   return get_expr_id (data, expr);
3939 }
3940
3941
3942
3943 /* Determines the cost of the computation by that USE is expressed
3944    from induction variable CAND.  If ADDRESS_P is true, we just need
3945    to create an address from it, otherwise we want to get it into
3946    register.  A set of invariants we depend on is stored in
3947    DEPENDS_ON.  AT is the statement at that the value is computed.
3948    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3949    addressing is likely.  */
3950
3951 static comp_cost
3952 get_computation_cost_at (struct ivopts_data *data,
3953                          struct iv_use *use, struct iv_cand *cand,
3954                          bool address_p, bitmap *depends_on, gimple at,
3955                          bool *can_autoinc,
3956                          int *inv_expr_id)
3957 {
3958   tree ubase = use->iv->base, ustep = use->iv->step;
3959   tree cbase, cstep;
3960   tree utype = TREE_TYPE (ubase), ctype;
3961   unsigned HOST_WIDE_INT cstepi, offset = 0;
3962   HOST_WIDE_INT ratio, aratio;
3963   bool var_present, symbol_present, stmt_is_after_inc;
3964   comp_cost cost;
3965   double_int rat;
3966   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3967   enum machine_mode mem_mode = (address_p
3968                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3969                                 : VOIDmode);
3970
3971   *depends_on = NULL;
3972
3973   /* Only consider real candidates.  */
3974   if (!cand->iv)
3975     return infinite_cost;
3976
3977   cbase = cand->iv->base;
3978   cstep = cand->iv->step;
3979   ctype = TREE_TYPE (cbase);
3980
3981   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3982     {
3983       /* We do not have a precision to express the values of use.  */
3984       return infinite_cost;
3985     }
3986
3987   if (address_p
3988       || (use->iv->base_object
3989           && cand->iv->base_object
3990           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3991           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3992     {
3993       /* Do not try to express address of an object with computation based
3994          on address of a different object.  This may cause problems in rtl
3995          level alias analysis (that does not expect this to be happening,
3996          as this is illegal in C), and would be unlikely to be useful
3997          anyway.  */
3998       if (use->iv->base_object
3999           && cand->iv->base_object
4000           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4001         return infinite_cost;
4002     }
4003
4004   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4005     {
4006       /* TODO -- add direct handling of this case.  */
4007       goto fallback;
4008     }
4009
4010   /* CSTEPI is removed from the offset in case statement is after the
4011      increment.  If the step is not constant, we use zero instead.
4012      This is a bit imprecise (there is the extra addition), but
4013      redundancy elimination is likely to transform the code so that
4014      it uses value of the variable before increment anyway,
4015      so it is not that much unrealistic.  */
4016   if (cst_and_fits_in_hwi (cstep))
4017     cstepi = int_cst_value (cstep);
4018   else
4019     cstepi = 0;
4020
4021   if (!constant_multiple_of (ustep, cstep, &rat))
4022     return infinite_cost;
4023
4024   if (rat.fits_shwi ())
4025     ratio = rat.to_shwi ();
4026   else
4027     return infinite_cost;
4028
4029   STRIP_NOPS (cbase);
4030   ctype = TREE_TYPE (cbase);
4031
4032   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4033
4034   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4035      or ratio == 1, it is better to handle this like
4036
4037      ubase - ratio * cbase + ratio * var
4038
4039      (also holds in the case ratio == -1, TODO.  */
4040
4041   if (cst_and_fits_in_hwi (cbase))
4042     {
4043       offset = - ratio * int_cst_value (cbase);
4044       cost = difference_cost (data,
4045                               ubase, build_int_cst (utype, 0),
4046                               &symbol_present, &var_present, &offset,
4047                               depends_on);
4048       cost.cost /= avg_loop_niter (data->current_loop);
4049     }
4050   else if (ratio == 1)
4051     {
4052       tree real_cbase = cbase;
4053
4054       /* Check to see if any adjustment is needed.  */
4055       if (cstepi == 0 && stmt_is_after_inc)
4056         {
4057           aff_tree real_cbase_aff;
4058           aff_tree cstep_aff;
4059
4060           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4061                                    &real_cbase_aff);
4062           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4063
4064           aff_combination_add (&real_cbase_aff, &cstep_aff);
4065           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4066         }
4067
4068       cost = difference_cost (data,
4069                               ubase, real_cbase,
4070                               &symbol_present, &var_present, &offset,
4071                               depends_on);
4072       cost.cost /= avg_loop_niter (data->current_loop);
4073     }
4074   else if (address_p
4075            && !POINTER_TYPE_P (ctype)
4076            && multiplier_allowed_in_address_p
4077                 (ratio, mem_mode,
4078                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4079     {
4080       cbase
4081         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4082       cost = difference_cost (data,
4083                               ubase, cbase,
4084                               &symbol_present, &var_present, &offset,
4085                               depends_on);
4086       cost.cost /= avg_loop_niter (data->current_loop);
4087     }
4088   else
4089     {
4090       cost = force_var_cost (data, cbase, depends_on);
4091       cost = add_costs (cost,
4092                         difference_cost (data,
4093                                          ubase, build_int_cst (utype, 0),
4094                                          &symbol_present, &var_present,
4095                                          &offset, depends_on));
4096       cost.cost /= avg_loop_niter (data->current_loop);
4097       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4098     }
4099
4100   if (inv_expr_id)
4101     {
4102       *inv_expr_id =
4103           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4104       /* Clear depends on.  */
4105       if (*inv_expr_id != -1 && depends_on && *depends_on)
4106         bitmap_clear (*depends_on);
4107     }
4108
4109   /* If we are after the increment, the value of the candidate is higher by
4110      one iteration.  */
4111   if (stmt_is_after_inc)
4112     offset -= ratio * cstepi;
4113
4114   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4115      (symbol/var1/const parts may be omitted).  If we are looking for an
4116      address, find the cost of addressing this.  */
4117   if (address_p)
4118     return add_costs (cost,
4119                       get_address_cost (symbol_present, var_present,
4120                                         offset, ratio, cstepi,
4121                                         mem_mode,
4122                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4123                                         speed, stmt_is_after_inc,
4124                                         can_autoinc));
4125
4126   /* Otherwise estimate the costs for computing the expression.  */
4127   if (!symbol_present && !var_present && !offset)
4128     {
4129       if (ratio != 1)
4130         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4131       return cost;
4132     }
4133
4134   /* Symbol + offset should be compile-time computable so consider that they
4135       are added once to the variable, if present.  */
4136   if (var_present && (symbol_present || offset))
4137     cost.cost += adjust_setup_cost (data,
4138                                     add_cost (speed, TYPE_MODE (ctype)));
4139
4140   /* Having offset does not affect runtime cost in case it is added to
4141      symbol, but it increases complexity.  */
4142   if (offset)
4143     cost.complexity++;
4144
4145   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4146
4147   aratio = ratio > 0 ? ratio : -ratio;
4148   if (aratio != 1)
4149     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4150   return cost;
4151
4152 fallback:
4153   if (can_autoinc)
4154     *can_autoinc = false;
4155
4156   {
4157     /* Just get the expression, expand it and measure the cost.  */
4158     tree comp = get_computation_at (data->current_loop, use, cand, at);
4159
4160     if (!comp)
4161       return infinite_cost;
4162
4163     if (address_p)
4164       comp = build_simple_mem_ref (comp);
4165
4166     return new_cost (computation_cost (comp, speed), 0);
4167   }
4168 }
4169
4170 /* Determines the cost of the computation by that USE is expressed
4171    from induction variable CAND.  If ADDRESS_P is true, we just need
4172    to create an address from it, otherwise we want to get it into
4173    register.  A set of invariants we depend on is stored in
4174    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4175    autoinc addressing is likely.  */
4176
4177 static comp_cost
4178 get_computation_cost (struct ivopts_data *data,
4179                       struct iv_use *use, struct iv_cand *cand,
4180                       bool address_p, bitmap *depends_on,
4181                       bool *can_autoinc, int *inv_expr_id)
4182 {
4183   return get_computation_cost_at (data,
4184                                   use, cand, address_p, depends_on, use->stmt,
4185                                   can_autoinc, inv_expr_id);
4186 }
4187
4188 /* Determines cost of basing replacement of USE on CAND in a generic
4189    expression.  */
4190
4191 static bool
4192 determine_use_iv_cost_generic (struct ivopts_data *data,
4193                                struct iv_use *use, struct iv_cand *cand)
4194 {
4195   bitmap depends_on;
4196   comp_cost cost;
4197   int inv_expr_id = -1;
4198
4199   /* The simple case first -- if we need to express value of the preserved
4200      original biv, the cost is 0.  This also prevents us from counting the
4201      cost of increment twice -- once at this use and once in the cost of
4202      the candidate.  */
4203   if (cand->pos == IP_ORIGINAL
4204       && cand->incremented_at == use->stmt)
4205     {
4206       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4207                        ERROR_MARK, -1);
4208       return true;
4209     }
4210
4211   cost = get_computation_cost (data, use, cand, false, &depends_on,
4212                                NULL, &inv_expr_id);
4213
4214   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4215                    inv_expr_id);
4216
4217   return !infinite_cost_p (cost);
4218 }
4219
4220 /* Determines cost of basing replacement of USE on CAND in an address.  */
4221
4222 static bool
4223 determine_use_iv_cost_address (struct ivopts_data *data,
4224                                struct iv_use *use, struct iv_cand *cand)
4225 {
4226   bitmap depends_on;
4227   bool can_autoinc;
4228   int inv_expr_id = -1;
4229   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4230                                          &can_autoinc, &inv_expr_id);
4231
4232   if (cand->ainc_use == use)
4233     {
4234       if (can_autoinc)
4235         cost.cost -= cand->cost_step;
4236       /* If we generated the candidate solely for exploiting autoincrement
4237          opportunities, and it turns out it can't be used, set the cost to
4238          infinity to make sure we ignore it.  */
4239       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4240         cost = infinite_cost;
4241     }
4242   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4243                    inv_expr_id);
4244
4245   return !infinite_cost_p (cost);
4246 }
4247
4248 /* Computes value of candidate CAND at position AT in iteration NITER, and
4249    stores it to VAL.  */
4250
4251 static void
4252 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4253                aff_tree *val)
4254 {
4255   aff_tree step, delta, nit;
4256   struct iv *iv = cand->iv;
4257   tree type = TREE_TYPE (iv->base);
4258   tree steptype = type;
4259   if (POINTER_TYPE_P (type))
4260     steptype = sizetype;
4261
4262   tree_to_aff_combination (iv->step, steptype, &step);
4263   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4264   aff_combination_convert (&nit, steptype);
4265   aff_combination_mult (&nit, &step, &delta);
4266   if (stmt_after_increment (loop, cand, at))
4267     aff_combination_add (&delta, &step);
4268
4269   tree_to_aff_combination (iv->base, type, val);
4270   aff_combination_add (val, &delta);
4271 }
4272
4273 /* Returns period of induction variable iv.  */
4274
4275 static tree
4276 iv_period (struct iv *iv)
4277 {
4278   tree step = iv->step, period, type;
4279   tree pow2div;
4280
4281   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4282
4283   type = unsigned_type_for (TREE_TYPE (step));
4284   /* Period of the iv is lcm (step, type_range)/step -1,
4285      i.e., N*type_range/step - 1. Since type range is power
4286      of two, N == (step >> num_of_ending_zeros_binary (step),
4287      so the final result is
4288
4289        (type_range >> num_of_ending_zeros_binary (step)) - 1
4290
4291   */
4292   pow2div = num_ending_zeros (step);
4293
4294   period = build_low_bits_mask (type,
4295                                 (TYPE_PRECISION (type)
4296                                  - tree_low_cst (pow2div, 1)));
4297
4298   return period;
4299 }
4300
4301 /* Returns the comparison operator used when eliminating the iv USE.  */
4302
4303 static enum tree_code
4304 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4305 {
4306   struct loop *loop = data->current_loop;
4307   basic_block ex_bb;
4308   edge exit;
4309
4310   ex_bb = gimple_bb (use->stmt);
4311   exit = EDGE_SUCC (ex_bb, 0);
4312   if (flow_bb_inside_loop_p (loop, exit->dest))
4313     exit = EDGE_SUCC (ex_bb, 1);
4314
4315   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4316 }
4317
4318 static tree
4319 strip_wrap_conserving_type_conversions (tree exp)
4320 {
4321   while (tree_ssa_useless_type_conversion (exp)
4322          && (nowrap_type_p (TREE_TYPE (exp))
4323              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4324     exp = TREE_OPERAND (exp, 0);
4325   return exp;
4326 }
4327
4328 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4329    check for an exact match.  */
4330
4331 static bool
4332 expr_equal_p (tree e, tree what)
4333 {
4334   gimple stmt;
4335   enum tree_code code;
4336
4337   e = strip_wrap_conserving_type_conversions (e);
4338   what = strip_wrap_conserving_type_conversions (what);
4339
4340   code = TREE_CODE (what);
4341   if (TREE_TYPE (e) != TREE_TYPE (what))
4342     return false;
4343
4344   if (operand_equal_p (e, what, 0))
4345     return true;
4346
4347   if (TREE_CODE (e) != SSA_NAME)
4348     return false;
4349
4350   stmt = SSA_NAME_DEF_STMT (e);
4351   if (gimple_code (stmt) != GIMPLE_ASSIGN
4352       || gimple_assign_rhs_code (stmt) != code)
4353     return false;
4354
4355   switch (get_gimple_rhs_class (code))
4356     {
4357     case GIMPLE_BINARY_RHS:
4358       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4359         return false;
4360       /* Fallthru.  */
4361
4362     case GIMPLE_UNARY_RHS:
4363     case GIMPLE_SINGLE_RHS:
4364       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4365     default:
4366       return false;
4367     }
4368 }
4369
4370 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4371    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4372    calculation is performed in non-wrapping type.
4373
4374    TODO: More generally, we could test for the situation that
4375          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4376          This would require knowing the sign of OFFSET.
4377
4378          Also, we only look for the first addition in the computation of BASE.
4379          More complex analysis would be better, but introducing it just for
4380          this optimization seems like an overkill.  */
4381
4382 static bool
4383 difference_cannot_overflow_p (tree base, tree offset)
4384 {
4385   enum tree_code code;
4386   tree e1, e2;
4387
4388   if (!nowrap_type_p (TREE_TYPE (base)))
4389     return false;
4390
4391   base = expand_simple_operations (base);
4392
4393   if (TREE_CODE (base) == SSA_NAME)
4394     {
4395       gimple stmt = SSA_NAME_DEF_STMT (base);
4396
4397       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4398         return false;
4399
4400       code = gimple_assign_rhs_code (stmt);
4401       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4402         return false;
4403
4404       e1 = gimple_assign_rhs1 (stmt);
4405       e2 = gimple_assign_rhs2 (stmt);
4406     }
4407   else
4408     {
4409       code = TREE_CODE (base);
4410       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4411         return false;
4412       e1 = TREE_OPERAND (base, 0);
4413       e2 = TREE_OPERAND (base, 1);
4414     }
4415
4416   /* TODO: deeper inspection may be necessary to prove the equality.  */
4417   switch (code)
4418     {
4419     case PLUS_EXPR:
4420       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4421     case POINTER_PLUS_EXPR:
4422       return expr_equal_p (e2, offset);
4423
4424     default:
4425       return false;
4426     }
4427 }
4428
4429 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4430    comparison with CAND.  NITER describes the number of iterations of
4431    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4432
4433    We aim to handle the following situation:
4434
4435    sometype *base, *p;
4436    int a, b, i;
4437
4438    i = a;
4439    p = p_0 = base + a;
4440
4441    do
4442      {
4443        bla (*p);
4444        p++;
4445        i++;
4446      }
4447    while (i < b);
4448
4449    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4450    We aim to optimize this to
4451
4452    p = p_0 = base + a;
4453    do
4454      {
4455        bla (*p);
4456        p++;
4457      }
4458    while (p < p_0 - a + b);
4459
4460    This preserves the correctness, since the pointer arithmetics does not
4461    overflow.  More precisely:
4462
4463    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4464       overflow in computing it or the values of p.
4465    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4466       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4467
4468 static bool
4469 iv_elimination_compare_lt (struct ivopts_data *data,
4470                            struct iv_cand *cand, enum tree_code *comp_p,
4471                            struct tree_niter_desc *niter)
4472 {
4473   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4474   struct affine_tree_combination nit, tmpa, tmpb;
4475   enum tree_code comp;
4476   HOST_WIDE_INT step;
4477
4478   /* We need to know that the candidate induction variable does not overflow.
4479      While more complex analysis may be used to prove this, for now just
4480      check that the variable appears in the original program and that it
4481      is computed in a type that guarantees no overflows.  */
4482   cand_type = TREE_TYPE (cand->iv->base);
4483   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4484     return false;
4485
4486   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4487      the calculation of the BOUND could overflow, making the comparison
4488      invalid.  */
4489   if (!data->loop_single_exit_p)
4490     return false;
4491
4492   /* We need to be able to decide whether candidate is increasing or decreasing
4493      in order to choose the right comparison operator.  */
4494   if (!cst_and_fits_in_hwi (cand->iv->step))
4495     return false;
4496   step = int_cst_value (cand->iv->step);
4497
4498   /* Check that the number of iterations matches the expected pattern:
4499      a + 1 > b ? 0 : b - a - 1.  */
4500   mbz = niter->may_be_zero;
4501   if (TREE_CODE (mbz) == GT_EXPR)
4502     {
4503       /* Handle a + 1 > b.  */
4504       tree op0 = TREE_OPERAND (mbz, 0);
4505       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4506         {
4507           a = TREE_OPERAND (op0, 0);
4508           b = TREE_OPERAND (mbz, 1);
4509         }
4510       else
4511         return false;
4512     }
4513   else if (TREE_CODE (mbz) == LT_EXPR)
4514     {
4515       tree op1 = TREE_OPERAND (mbz, 1);
4516
4517       /* Handle b < a + 1.  */
4518       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4519         {
4520           a = TREE_OPERAND (op1, 0);
4521           b = TREE_OPERAND (mbz, 0);
4522         }
4523       else
4524         return false;
4525     }
4526   else
4527     return false;
4528
4529   /* Expected number of iterations is B - A - 1.  Check that it matches
4530      the actual number, i.e., that B - A - NITER = 1.  */
4531   tree_to_aff_combination (niter->niter, nit_type, &nit);
4532   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4533   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4534   aff_combination_scale (&nit, double_int_minus_one);
4535   aff_combination_scale (&tmpa, double_int_minus_one);
4536   aff_combination_add (&tmpb, &tmpa);
4537   aff_combination_add (&tmpb, &nit);
4538   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4539     return false;
4540
4541   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4542      overflow.  */
4543   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4544                         cand->iv->step,
4545                         fold_convert (TREE_TYPE (cand->iv->step), a));
4546   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4547     return false;
4548
4549   /* Determine the new comparison operator.  */
4550   comp = step < 0 ? GT_EXPR : LT_EXPR;
4551   if (*comp_p == NE_EXPR)
4552     *comp_p = comp;
4553   else if (*comp_p == EQ_EXPR)
4554     *comp_p = invert_tree_comparison (comp, false);
4555   else
4556     gcc_unreachable ();
4557
4558   return true;
4559 }
4560
4561 /* Check whether it is possible to express the condition in USE by comparison
4562    of candidate CAND.  If so, store the value compared with to BOUND, and the
4563    comparison operator to COMP.  */
4564
4565 static bool
4566 may_eliminate_iv (struct ivopts_data *data,
4567                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4568                   enum tree_code *comp)
4569 {
4570   basic_block ex_bb;
4571   edge exit;
4572   tree period;
4573   struct loop *loop = data->current_loop;
4574   aff_tree bnd;
4575   struct tree_niter_desc *desc = NULL;
4576
4577   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4578     return false;
4579
4580   /* For now works only for exits that dominate the loop latch.
4581      TODO: extend to other conditions inside loop body.  */
4582   ex_bb = gimple_bb (use->stmt);
4583   if (use->stmt != last_stmt (ex_bb)
4584       || gimple_code (use->stmt) != GIMPLE_COND
4585       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4586     return false;
4587
4588   exit = EDGE_SUCC (ex_bb, 0);
4589   if (flow_bb_inside_loop_p (loop, exit->dest))
4590     exit = EDGE_SUCC (ex_bb, 1);
4591   if (flow_bb_inside_loop_p (loop, exit->dest))
4592     return false;
4593
4594   desc = niter_for_exit (data, exit);
4595   if (!desc)
4596     return false;
4597
4598   /* Determine whether we can use the variable to test the exit condition.
4599      This is the case iff the period of the induction variable is greater
4600      than the number of iterations for which the exit condition is true.  */
4601   period = iv_period (cand->iv);
4602
4603   /* If the number of iterations is constant, compare against it directly.  */
4604   if (TREE_CODE (desc->niter) == INTEGER_CST)
4605     {
4606       /* See cand_value_at.  */
4607       if (stmt_after_increment (loop, cand, use->stmt))
4608         {
4609           if (!tree_int_cst_lt (desc->niter, period))
4610             return false;
4611         }
4612       else
4613         {
4614           if (tree_int_cst_lt (period, desc->niter))
4615             return false;
4616         }
4617     }
4618
4619   /* If not, and if this is the only possible exit of the loop, see whether
4620      we can get a conservative estimate on the number of iterations of the
4621      entire loop and compare against that instead.  */
4622   else
4623     {
4624       double_int period_value, max_niter;
4625
4626       max_niter = desc->max;
4627       if (stmt_after_increment (loop, cand, use->stmt))
4628         max_niter += double_int_one;
4629       period_value = tree_to_double_int (period);
4630       if (max_niter.ugt (period_value))
4631         {
4632           /* See if we can take advantage of inferred loop bound information.  */
4633           if (data->loop_single_exit_p)
4634             {
4635               if (!max_loop_iterations (loop, &max_niter))
4636                 return false;
4637               /* The loop bound is already adjusted by adding 1.  */
4638               if (max_niter.ugt (period_value))
4639                 return false;
4640             }
4641           else
4642             return false;
4643         }
4644     }
4645
4646   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4647
4648   *bound = aff_combination_to_tree (&bnd);
4649   *comp = iv_elimination_compare (data, use);
4650
4651   /* It is unlikely that computing the number of iterations using division
4652      would be more profitable than keeping the original induction variable.  */
4653   if (expression_expensive_p (*bound))
4654     return false;
4655
4656   /* Sometimes, it is possible to handle the situation that the number of
4657      iterations may be zero unless additional assumtions by using <
4658      instead of != in the exit condition.
4659
4660      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4661            base the exit condition on it.  However, that is often too
4662            expensive.  */
4663   if (!integer_zerop (desc->may_be_zero))
4664     return iv_elimination_compare_lt (data, cand, comp, desc);
4665
4666   return true;
4667 }
4668
4669  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4670     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4671
4672 static int
4673 parm_decl_cost (struct ivopts_data *data, tree bound)
4674 {
4675   tree sbound = bound;
4676   STRIP_NOPS (sbound);
4677
4678   if (TREE_CODE (sbound) == SSA_NAME
4679       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4680       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4681       && data->body_includes_call)
4682     return COSTS_N_INSNS (1);
4683
4684   return 0;
4685 }
4686
4687 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4688
4689 static bool
4690 determine_use_iv_cost_condition (struct ivopts_data *data,
4691                                  struct iv_use *use, struct iv_cand *cand)
4692 {
4693   tree bound = NULL_TREE;
4694   struct iv *cmp_iv;
4695   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4696   comp_cost elim_cost, express_cost, cost, bound_cost;
4697   bool ok;
4698   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4699   tree *control_var, *bound_cst;
4700   enum tree_code comp = ERROR_MARK;
4701
4702   /* Only consider real candidates.  */
4703   if (!cand->iv)
4704     {
4705       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4706                        ERROR_MARK, -1);
4707       return false;
4708     }
4709
4710   /* Try iv elimination.  */
4711   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4712     {
4713       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4714       if (elim_cost.cost == 0)
4715         elim_cost.cost = parm_decl_cost (data, bound);
4716       else if (TREE_CODE (bound) == INTEGER_CST)
4717         elim_cost.cost = 0;
4718       /* If we replace a loop condition 'i < n' with 'p < base + n',
4719          depends_on_elim will have 'base' and 'n' set, which implies
4720          that both 'base' and 'n' will be live during the loop.  More likely,
4721          'base + n' will be loop invariant, resulting in only one live value
4722          during the loop.  So in that case we clear depends_on_elim and set
4723         elim_inv_expr_id instead.  */
4724       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4725         {
4726           elim_inv_expr_id = get_expr_id (data, bound);
4727           bitmap_clear (depends_on_elim);
4728         }
4729       /* The bound is a loop invariant, so it will be only computed
4730          once.  */
4731       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4732     }
4733   else
4734     elim_cost = infinite_cost;
4735
4736   /* Try expressing the original giv.  If it is compared with an invariant,
4737      note that we cannot get rid of it.  */
4738   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4739                               NULL, &cmp_iv);
4740   gcc_assert (ok);
4741
4742   /* When the condition is a comparison of the candidate IV against
4743      zero, prefer this IV.
4744
4745      TODO: The constant that we're subtracting from the cost should
4746      be target-dependent.  This information should be added to the
4747      target costs for each backend.  */
4748   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4749       && integer_zerop (*bound_cst)
4750       && (operand_equal_p (*control_var, cand->var_after, 0)
4751           || operand_equal_p (*control_var, cand->var_before, 0)))
4752     elim_cost.cost -= 1;
4753
4754   express_cost = get_computation_cost (data, use, cand, false,
4755                                        &depends_on_express, NULL,
4756                                        &express_inv_expr_id);
4757   fd_ivopts_data = data;
4758   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4759
4760   /* Count the cost of the original bound as well.  */
4761   bound_cost = force_var_cost (data, *bound_cst, NULL);
4762   if (bound_cost.cost == 0)
4763     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4764   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4765     bound_cost.cost = 0;
4766   express_cost.cost += bound_cost.cost;
4767
4768   /* Choose the better approach, preferring the eliminated IV. */
4769   if (compare_costs (elim_cost, express_cost) <= 0)
4770     {
4771       cost = elim_cost;
4772       depends_on = depends_on_elim;
4773       depends_on_elim = NULL;
4774       inv_expr_id = elim_inv_expr_id;
4775     }
4776   else
4777     {
4778       cost = express_cost;
4779       depends_on = depends_on_express;
4780       depends_on_express = NULL;
4781       bound = NULL_TREE;
4782       comp = ERROR_MARK;
4783       inv_expr_id = express_inv_expr_id;
4784     }
4785
4786   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4787
4788   if (depends_on_elim)
4789     BITMAP_FREE (depends_on_elim);
4790   if (depends_on_express)
4791     BITMAP_FREE (depends_on_express);
4792
4793   return !infinite_cost_p (cost);
4794 }
4795
4796 /* Determines cost of basing replacement of USE on CAND.  Returns false
4797    if USE cannot be based on CAND.  */
4798
4799 static bool
4800 determine_use_iv_cost (struct ivopts_data *data,
4801                        struct iv_use *use, struct iv_cand *cand)
4802 {
4803   switch (use->type)
4804     {
4805     case USE_NONLINEAR_EXPR:
4806       return determine_use_iv_cost_generic (data, use, cand);
4807
4808     case USE_ADDRESS:
4809       return determine_use_iv_cost_address (data, use, cand);
4810
4811     case USE_COMPARE:
4812       return determine_use_iv_cost_condition (data, use, cand);
4813
4814     default:
4815       gcc_unreachable ();
4816     }
4817 }
4818
4819 /* Return true if get_computation_cost indicates that autoincrement is
4820    a possibility for the pair of USE and CAND, false otherwise.  */
4821
4822 static bool
4823 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4824                            struct iv_cand *cand)
4825 {
4826   bitmap depends_on;
4827   bool can_autoinc;
4828   comp_cost cost;
4829
4830   if (use->type != USE_ADDRESS)
4831     return false;
4832
4833   cost = get_computation_cost (data, use, cand, true, &depends_on,
4834                                &can_autoinc, NULL);
4835
4836   BITMAP_FREE (depends_on);
4837
4838   return !infinite_cost_p (cost) && can_autoinc;
4839 }
4840
4841 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4842    use that allows autoincrement, and set their AINC_USE if possible.  */
4843
4844 static void
4845 set_autoinc_for_original_candidates (struct ivopts_data *data)
4846 {
4847   unsigned i, j;
4848
4849   for (i = 0; i < n_iv_cands (data); i++)
4850     {
4851       struct iv_cand *cand = iv_cand (data, i);
4852       struct iv_use *closest_before = NULL;
4853       struct iv_use *closest_after = NULL;
4854       if (cand->pos != IP_ORIGINAL)
4855         continue;
4856
4857       for (j = 0; j < n_iv_uses (data); j++)
4858         {
4859           struct iv_use *use = iv_use (data, j);
4860           unsigned uid = gimple_uid (use->stmt);
4861
4862           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4863             continue;
4864
4865           if (uid < gimple_uid (cand->incremented_at)
4866               && (closest_before == NULL
4867                   || uid > gimple_uid (closest_before->stmt)))
4868             closest_before = use;
4869
4870           if (uid > gimple_uid (cand->incremented_at)
4871               && (closest_after == NULL
4872                   || uid < gimple_uid (closest_after->stmt)))
4873             closest_after = use;
4874         }
4875
4876       if (closest_before != NULL
4877           && autoinc_possible_for_pair (data, closest_before, cand))
4878         cand->ainc_use = closest_before;
4879       else if (closest_after != NULL
4880                && autoinc_possible_for_pair (data, closest_after, cand))
4881         cand->ainc_use = closest_after;
4882     }
4883 }
4884
4885 /* Finds the candidates for the induction variables.  */
4886
4887 static void
4888 find_iv_candidates (struct ivopts_data *data)
4889 {
4890   /* Add commonly used ivs.  */
4891   add_standard_iv_candidates (data);
4892
4893   /* Add old induction variables.  */
4894   add_old_ivs_candidates (data);
4895
4896   /* Add induction variables derived from uses.  */
4897   add_derived_ivs_candidates (data);
4898
4899   set_autoinc_for_original_candidates (data);
4900
4901   /* Record the important candidates.  */
4902   record_important_candidates (data);
4903 }
4904
4905 /* Determines costs of basing the use of the iv on an iv candidate.  */
4906
4907 static void
4908 determine_use_iv_costs (struct ivopts_data *data)
4909 {
4910   unsigned i, j;
4911   struct iv_use *use;
4912   struct iv_cand *cand;
4913   bitmap to_clear = BITMAP_ALLOC (NULL);
4914
4915   alloc_use_cost_map (data);
4916
4917   for (i = 0; i < n_iv_uses (data); i++)
4918     {
4919       use = iv_use (data, i);
4920
4921       if (data->consider_all_candidates)
4922         {
4923           for (j = 0; j < n_iv_cands (data); j++)
4924             {
4925               cand = iv_cand (data, j);
4926               determine_use_iv_cost (data, use, cand);
4927             }
4928         }
4929       else
4930         {
4931           bitmap_iterator bi;
4932
4933           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4934             {
4935               cand = iv_cand (data, j);
4936               if (!determine_use_iv_cost (data, use, cand))
4937                 bitmap_set_bit (to_clear, j);
4938             }
4939
4940           /* Remove the candidates for that the cost is infinite from
4941              the list of related candidates.  */
4942           bitmap_and_compl_into (use->related_cands, to_clear);
4943           bitmap_clear (to_clear);
4944         }
4945     }
4946
4947   BITMAP_FREE (to_clear);
4948
4949   if (dump_file && (dump_flags & TDF_DETAILS))
4950     {
4951       fprintf (dump_file, "Use-candidate costs:\n");
4952
4953       for (i = 0; i < n_iv_uses (data); i++)
4954         {
4955           use = iv_use (data, i);
4956
4957           fprintf (dump_file, "Use %d:\n", i);
4958           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4959           for (j = 0; j < use->n_map_members; j++)
4960             {
4961               if (!use->cost_map[j].cand
4962                   || infinite_cost_p (use->cost_map[j].cost))
4963                 continue;
4964
4965               fprintf (dump_file, "  %d\t%d\t%d\t",
4966                        use->cost_map[j].cand->id,
4967                        use->cost_map[j].cost.cost,
4968                        use->cost_map[j].cost.complexity);
4969               if (use->cost_map[j].depends_on)
4970                 bitmap_print (dump_file,
4971                               use->cost_map[j].depends_on, "","");
4972               if (use->cost_map[j].inv_expr_id != -1)
4973                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4974               fprintf (dump_file, "\n");
4975             }
4976
4977           fprintf (dump_file, "\n");
4978         }
4979       fprintf (dump_file, "\n");
4980     }
4981 }
4982
4983 /* Determines cost of the candidate CAND.  */
4984
4985 static void
4986 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4987 {
4988   comp_cost cost_base;
4989   unsigned cost, cost_step;
4990   tree base;
4991
4992   if (!cand->iv)
4993     {
4994       cand->cost = 0;
4995       return;
4996     }
4997
4998   /* There are two costs associated with the candidate -- its increment
4999      and its initialization.  The second is almost negligible for any loop
5000      that rolls enough, so we take it just very little into account.  */
5001
5002   base = cand->iv->base;
5003   cost_base = force_var_cost (data, base, NULL);
5004   /* It will be exceptional that the iv register happens to be initialized with
5005      the proper value at no cost.  In general, there will at least be a regcopy
5006      or a const set.  */
5007   if (cost_base.cost == 0)
5008     cost_base.cost = COSTS_N_INSNS (1);
5009   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5010
5011   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5012
5013   /* Prefer the original ivs unless we may gain something by replacing it.
5014      The reason is to make debugging simpler; so this is not relevant for
5015      artificial ivs created by other optimization passes.  */
5016   if (cand->pos != IP_ORIGINAL
5017       || !SSA_NAME_VAR (cand->var_before)
5018       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5019     cost++;
5020
5021   /* Prefer not to insert statements into latch unless there are some
5022      already (so that we do not create unnecessary jumps).  */
5023   if (cand->pos == IP_END
5024       && empty_block_p (ip_end_pos (data->current_loop)))
5025     cost++;
5026
5027   cand->cost = cost;
5028   cand->cost_step = cost_step;
5029 }
5030
5031 /* Determines costs of computation of the candidates.  */
5032
5033 static void
5034 determine_iv_costs (struct ivopts_data *data)
5035 {
5036   unsigned i;
5037
5038   if (dump_file && (dump_flags & TDF_DETAILS))
5039     {
5040       fprintf (dump_file, "Candidate costs:\n");
5041       fprintf (dump_file, "  cand\tcost\n");
5042     }
5043
5044   for (i = 0; i < n_iv_cands (data); i++)
5045     {
5046       struct iv_cand *cand = iv_cand (data, i);
5047
5048       determine_iv_cost (data, cand);
5049
5050       if (dump_file && (dump_flags & TDF_DETAILS))
5051         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5052     }
5053
5054   if (dump_file && (dump_flags & TDF_DETAILS))
5055     fprintf (dump_file, "\n");
5056 }
5057
5058 /* Calculates cost for having SIZE induction variables.  */
5059
5060 static unsigned
5061 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5062 {
5063   /* We add size to the cost, so that we prefer eliminating ivs
5064      if possible.  */
5065   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5066                                             data->body_includes_call);
5067 }
5068
5069 /* For each size of the induction variable set determine the penalty.  */
5070
5071 static void
5072 determine_set_costs (struct ivopts_data *data)
5073 {
5074   unsigned j, n;
5075   gimple phi;
5076   gimple_stmt_iterator psi;
5077   tree op;
5078   struct loop *loop = data->current_loop;
5079   bitmap_iterator bi;
5080
5081   if (dump_file && (dump_flags & TDF_DETAILS))
5082     {
5083       fprintf (dump_file, "Global costs:\n");
5084       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5085       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5086       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5087       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5088     }
5089
5090   n = 0;
5091   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5092     {
5093       phi = gsi_stmt (psi);
5094       op = PHI_RESULT (phi);
5095
5096       if (virtual_operand_p (op))
5097         continue;
5098
5099       if (get_iv (data, op))
5100         continue;
5101
5102       n++;
5103     }
5104
5105   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5106     {
5107       struct version_info *info = ver_info (data, j);
5108
5109       if (info->inv_id && info->has_nonlin_use)
5110         n++;
5111     }
5112
5113   data->regs_used = n;
5114   if (dump_file && (dump_flags & TDF_DETAILS))
5115     fprintf (dump_file, "  regs_used %d\n", n);
5116
5117   if (dump_file && (dump_flags & TDF_DETAILS))
5118     {
5119       fprintf (dump_file, "  cost for size:\n");
5120       fprintf (dump_file, "  ivs\tcost\n");
5121       for (j = 0; j <= 2 * target_avail_regs; j++)
5122         fprintf (dump_file, "  %d\t%d\n", j,
5123                  ivopts_global_cost_for_size (data, j));
5124       fprintf (dump_file, "\n");
5125     }
5126 }
5127
5128 /* Returns true if A is a cheaper cost pair than B.  */
5129
5130 static bool
5131 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5132 {
5133   int cmp;
5134
5135   if (!a)
5136     return false;
5137
5138   if (!b)
5139     return true;
5140
5141   cmp = compare_costs (a->cost, b->cost);
5142   if (cmp < 0)
5143     return true;
5144
5145   if (cmp > 0)
5146     return false;
5147
5148   /* In case the costs are the same, prefer the cheaper candidate.  */
5149   if (a->cand->cost < b->cand->cost)
5150     return true;
5151
5152   return false;
5153 }
5154
5155
5156 /* Returns candidate by that USE is expressed in IVS.  */
5157
5158 static struct cost_pair *
5159 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5160 {
5161   return ivs->cand_for_use[use->id];
5162 }
5163
5164 /* Computes the cost field of IVS structure.  */
5165
5166 static void
5167 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5168 {
5169   comp_cost cost = ivs->cand_use_cost;
5170
5171   cost.cost += ivs->cand_cost;
5172
5173   cost.cost += ivopts_global_cost_for_size (data,
5174                                             ivs->n_regs + ivs->num_used_inv_expr);
5175
5176   ivs->cost = cost;
5177 }
5178
5179 /* Remove invariants in set INVS to set IVS.  */
5180
5181 static void
5182 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5183 {
5184   bitmap_iterator bi;
5185   unsigned iid;
5186
5187   if (!invs)
5188     return;
5189
5190   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5191     {
5192       ivs->n_invariant_uses[iid]--;
5193       if (ivs->n_invariant_uses[iid] == 0)
5194         ivs->n_regs--;
5195     }
5196 }
5197
5198 /* Set USE not to be expressed by any candidate in IVS.  */
5199
5200 static void
5201 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5202                  struct iv_use *use)
5203 {
5204   unsigned uid = use->id, cid;
5205   struct cost_pair *cp;
5206
5207   cp = ivs->cand_for_use[uid];
5208   if (!cp)
5209     return;
5210   cid = cp->cand->id;
5211
5212   ivs->bad_uses++;
5213   ivs->cand_for_use[uid] = NULL;
5214   ivs->n_cand_uses[cid]--;
5215
5216   if (ivs->n_cand_uses[cid] == 0)
5217     {
5218       bitmap_clear_bit (ivs->cands, cid);
5219       /* Do not count the pseudocandidates.  */
5220       if (cp->cand->iv)
5221         ivs->n_regs--;
5222       ivs->n_cands--;
5223       ivs->cand_cost -= cp->cand->cost;
5224
5225       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5226     }
5227
5228   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5229
5230   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5231
5232   if (cp->inv_expr_id != -1)
5233     {
5234       ivs->used_inv_expr[cp->inv_expr_id]--;
5235       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5236         ivs->num_used_inv_expr--;
5237     }
5238   iv_ca_recount_cost (data, ivs);
5239 }
5240
5241 /* Add invariants in set INVS to set IVS.  */
5242
5243 static void
5244 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5245 {
5246   bitmap_iterator bi;
5247   unsigned iid;
5248
5249   if (!invs)
5250     return;
5251
5252   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5253     {
5254       ivs->n_invariant_uses[iid]++;
5255       if (ivs->n_invariant_uses[iid] == 1)
5256         ivs->n_regs++;
5257     }
5258 }
5259
5260 /* Set cost pair for USE in set IVS to CP.  */
5261
5262 static void
5263 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5264               struct iv_use *use, struct cost_pair *cp)
5265 {
5266   unsigned uid = use->id, cid;
5267
5268   if (ivs->cand_for_use[uid] == cp)
5269     return;
5270
5271   if (ivs->cand_for_use[uid])
5272     iv_ca_set_no_cp (data, ivs, use);
5273
5274   if (cp)
5275     {
5276       cid = cp->cand->id;
5277
5278       ivs->bad_uses--;
5279       ivs->cand_for_use[uid] = cp;
5280       ivs->n_cand_uses[cid]++;
5281       if (ivs->n_cand_uses[cid] == 1)
5282         {
5283           bitmap_set_bit (ivs->cands, cid);
5284           /* Do not count the pseudocandidates.  */
5285           if (cp->cand->iv)
5286             ivs->n_regs++;
5287           ivs->n_cands++;
5288           ivs->cand_cost += cp->cand->cost;
5289
5290           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5291         }
5292
5293       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5294       iv_ca_set_add_invariants (ivs, cp->depends_on);
5295
5296       if (cp->inv_expr_id != -1)
5297         {
5298           ivs->used_inv_expr[cp->inv_expr_id]++;
5299           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5300             ivs->num_used_inv_expr++;
5301         }
5302       iv_ca_recount_cost (data, ivs);
5303     }
5304 }
5305
5306 /* Extend set IVS by expressing USE by some of the candidates in it
5307    if possible. All important candidates will be considered
5308    if IMPORTANT_CANDIDATES is true.  */
5309
5310 static void
5311 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5312                struct iv_use *use, bool important_candidates)
5313 {
5314   struct cost_pair *best_cp = NULL, *cp;
5315   bitmap_iterator bi;
5316   bitmap cands;
5317   unsigned i;
5318
5319   gcc_assert (ivs->upto >= use->id);
5320
5321   if (ivs->upto == use->id)
5322     {
5323       ivs->upto++;
5324       ivs->bad_uses++;
5325     }
5326
5327   cands = (important_candidates ? data->important_candidates : ivs->cands);
5328   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5329     {
5330       struct iv_cand *cand = iv_cand (data, i);
5331
5332       cp = get_use_iv_cost (data, use, cand);
5333
5334       if (cheaper_cost_pair (cp, best_cp))
5335         best_cp = cp;
5336     }
5337
5338   iv_ca_set_cp (data, ivs, use, best_cp);
5339 }
5340
5341 /* Get cost for assignment IVS.  */
5342
5343 static comp_cost
5344 iv_ca_cost (struct iv_ca *ivs)
5345 {
5346   /* This was a conditional expression but it triggered a bug in
5347      Sun C 5.5.  */
5348   if (ivs->bad_uses)
5349     return infinite_cost;
5350   else
5351     return ivs->cost;
5352 }
5353
5354 /* Returns true if all dependences of CP are among invariants in IVS.  */
5355
5356 static bool
5357 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5358 {
5359   unsigned i;
5360   bitmap_iterator bi;
5361
5362   if (!cp->depends_on)
5363     return true;
5364
5365   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5366     {
5367       if (ivs->n_invariant_uses[i] == 0)
5368         return false;
5369     }
5370
5371   return true;
5372 }
5373
5374 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5375    it before NEXT_CHANGE.  */
5376
5377 static struct iv_ca_delta *
5378 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5379                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5380 {
5381   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5382
5383   change->use = use;
5384   change->old_cp = old_cp;
5385   change->new_cp = new_cp;
5386   change->next_change = next_change;
5387
5388   return change;
5389 }
5390
5391 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5392    are rewritten.  */
5393
5394 static struct iv_ca_delta *
5395 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5396 {
5397   struct iv_ca_delta *last;
5398
5399   if (!l2)
5400     return l1;
5401
5402   if (!l1)
5403     return l2;
5404
5405   for (last = l1; last->next_change; last = last->next_change)
5406     continue;
5407   last->next_change = l2;
5408
5409   return l1;
5410 }
5411
5412 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5413
5414 static struct iv_ca_delta *
5415 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5416 {
5417   struct iv_ca_delta *act, *next, *prev = NULL;
5418   struct cost_pair *tmp;
5419
5420   for (act = delta; act; act = next)
5421     {
5422       next = act->next_change;
5423       act->next_change = prev;
5424       prev = act;
5425
5426       tmp = act->old_cp;
5427       act->old_cp = act->new_cp;
5428       act->new_cp = tmp;
5429     }
5430
5431   return prev;
5432 }
5433
5434 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5435    reverted instead.  */
5436
5437 static void
5438 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5439                     struct iv_ca_delta *delta, bool forward)
5440 {
5441   struct cost_pair *from, *to;
5442   struct iv_ca_delta *act;
5443
5444   if (!forward)
5445     delta = iv_ca_delta_reverse (delta);
5446
5447   for (act = delta; act; act = act->next_change)
5448     {
5449       from = act->old_cp;
5450       to = act->new_cp;
5451       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5452       iv_ca_set_cp (data, ivs, act->use, to);
5453     }
5454
5455   if (!forward)
5456     iv_ca_delta_reverse (delta);
5457 }
5458
5459 /* Returns true if CAND is used in IVS.  */
5460
5461 static bool
5462 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5463 {
5464   return ivs->n_cand_uses[cand->id] > 0;
5465 }
5466
5467 /* Returns number of induction variable candidates in the set IVS.  */
5468
5469 static unsigned
5470 iv_ca_n_cands (struct iv_ca *ivs)
5471 {
5472   return ivs->n_cands;
5473 }
5474
5475 /* Free the list of changes DELTA.  */
5476
5477 static void
5478 iv_ca_delta_free (struct iv_ca_delta **delta)
5479 {
5480   struct iv_ca_delta *act, *next;
5481
5482   for (act = *delta; act; act = next)
5483     {
5484       next = act->next_change;
5485       free (act);
5486     }
5487
5488   *delta = NULL;
5489 }
5490
5491 /* Allocates new iv candidates assignment.  */
5492
5493 static struct iv_ca *
5494 iv_ca_new (struct ivopts_data *data)
5495 {
5496   struct iv_ca *nw = XNEW (struct iv_ca);
5497
5498   nw->upto = 0;
5499   nw->bad_uses = 0;
5500   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5501   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5502   nw->cands = BITMAP_ALLOC (NULL);
5503   nw->n_cands = 0;
5504   nw->n_regs = 0;
5505   nw->cand_use_cost = no_cost;
5506   nw->cand_cost = 0;
5507   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5508   nw->cost = no_cost;
5509   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5510   nw->num_used_inv_expr = 0;
5511
5512   return nw;
5513 }
5514
5515 /* Free memory occupied by the set IVS.  */
5516
5517 static void
5518 iv_ca_free (struct iv_ca **ivs)
5519 {
5520   free ((*ivs)->cand_for_use);
5521   free ((*ivs)->n_cand_uses);
5522   BITMAP_FREE ((*ivs)->cands);
5523   free ((*ivs)->n_invariant_uses);
5524   free ((*ivs)->used_inv_expr);
5525   free (*ivs);
5526   *ivs = NULL;
5527 }
5528
5529 /* Dumps IVS to FILE.  */
5530
5531 static void
5532 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5533 {
5534   const char *pref = "  invariants ";
5535   unsigned i;
5536   comp_cost cost = iv_ca_cost (ivs);
5537
5538   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5539   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5540            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5541   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5542
5543    for (i = 0; i < ivs->upto; i++)
5544     {
5545       struct iv_use *use = iv_use (data, i);
5546       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5547       if (cp)
5548         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5549                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5550       else
5551         fprintf (file, "   use:%d --> ??\n", use->id);
5552     }
5553
5554   for (i = 1; i <= data->max_inv_id; i++)
5555     if (ivs->n_invariant_uses[i])
5556       {
5557         fprintf (file, "%s%d", pref, i);
5558         pref = ", ";
5559       }
5560   fprintf (file, "\n\n");
5561 }
5562
5563 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5564    new set, and store differences in DELTA.  Number of induction variables
5565    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5566    the function will try to find a solution with mimimal iv candidates.  */
5567
5568 static comp_cost
5569 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5570               struct iv_cand *cand, struct iv_ca_delta **delta,
5571               unsigned *n_ivs, bool min_ncand)
5572 {
5573   unsigned i;
5574   comp_cost cost;
5575   struct iv_use *use;
5576   struct cost_pair *old_cp, *new_cp;
5577
5578   *delta = NULL;
5579   for (i = 0; i < ivs->upto; i++)
5580     {
5581       use = iv_use (data, i);
5582       old_cp = iv_ca_cand_for_use (ivs, use);
5583
5584       if (old_cp
5585           && old_cp->cand == cand)
5586         continue;
5587
5588       new_cp = get_use_iv_cost (data, use, cand);
5589       if (!new_cp)
5590         continue;
5591
5592       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5593         continue;
5594
5595       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5596         continue;
5597
5598       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5599     }
5600
5601   iv_ca_delta_commit (data, ivs, *delta, true);
5602   cost = iv_ca_cost (ivs);
5603   if (n_ivs)
5604     *n_ivs = iv_ca_n_cands (ivs);
5605   iv_ca_delta_commit (data, ivs, *delta, false);
5606
5607   return cost;
5608 }
5609
5610 /* Try narrowing set IVS by removing CAND.  Return the cost of
5611    the new set and store the differences in DELTA.  */
5612
5613 static comp_cost
5614 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5615               struct iv_cand *cand, struct iv_ca_delta **delta)
5616 {
5617   unsigned i, ci;
5618   struct iv_use *use;
5619   struct cost_pair *old_cp, *new_cp, *cp;
5620   bitmap_iterator bi;
5621   struct iv_cand *cnd;
5622   comp_cost cost;
5623
5624   *delta = NULL;
5625   for (i = 0; i < n_iv_uses (data); i++)
5626     {
5627       use = iv_use (data, i);
5628
5629       old_cp = iv_ca_cand_for_use (ivs, use);
5630       if (old_cp->cand != cand)
5631         continue;
5632
5633       new_cp = NULL;
5634
5635       if (data->consider_all_candidates)
5636         {
5637           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5638             {
5639               if (ci == cand->id)
5640                 continue;
5641
5642               cnd = iv_cand (data, ci);
5643
5644               cp = get_use_iv_cost (data, use, cnd);
5645               if (!cp)
5646                 continue;
5647
5648               if (!iv_ca_has_deps (ivs, cp))
5649                 continue;
5650
5651               if (!cheaper_cost_pair (cp, new_cp))
5652                 continue;
5653
5654               new_cp = cp;
5655             }
5656         }
5657       else
5658         {
5659           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5660             {
5661               if (ci == cand->id)
5662                 continue;
5663
5664               cnd = iv_cand (data, ci);
5665
5666               cp = get_use_iv_cost (data, use, cnd);
5667               if (!cp)
5668                 continue;
5669               if (!iv_ca_has_deps (ivs, cp))
5670                 continue;
5671
5672               if (!cheaper_cost_pair (cp, new_cp))
5673                 continue;
5674
5675               new_cp = cp;
5676             }
5677         }
5678
5679       if (!new_cp)
5680         {
5681           iv_ca_delta_free (delta);
5682           return infinite_cost;
5683         }
5684
5685       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5686     }
5687
5688   iv_ca_delta_commit (data, ivs, *delta, true);
5689   cost = iv_ca_cost (ivs);
5690   iv_ca_delta_commit (data, ivs, *delta, false);
5691
5692   return cost;
5693 }
5694
5695 /* Try optimizing the set of candidates IVS by removing candidates different
5696    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5697    differences in DELTA.  */
5698
5699 static comp_cost
5700 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5701              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5702 {
5703   bitmap_iterator bi;
5704   struct iv_ca_delta *act_delta, *best_delta;
5705   unsigned i;
5706   comp_cost best_cost, acost;
5707   struct iv_cand *cand;
5708
5709   best_delta = NULL;
5710   best_cost = iv_ca_cost (ivs);
5711
5712   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5713     {
5714       cand = iv_cand (data, i);
5715
5716       if (cand == except_cand)
5717         continue;
5718
5719       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5720
5721       if (compare_costs (acost, best_cost) < 0)
5722         {
5723           best_cost = acost;
5724           iv_ca_delta_free (&best_delta);
5725           best_delta = act_delta;
5726         }
5727       else
5728         iv_ca_delta_free (&act_delta);
5729     }
5730
5731   if (!best_delta)
5732     {
5733       *delta = NULL;
5734       return best_cost;
5735     }
5736
5737   /* Recurse to possibly remove other unnecessary ivs.  */
5738   iv_ca_delta_commit (data, ivs, best_delta, true);
5739   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5740   iv_ca_delta_commit (data, ivs, best_delta, false);
5741   *delta = iv_ca_delta_join (best_delta, *delta);
5742   return best_cost;
5743 }
5744
5745 /* Tries to extend the sets IVS in the best possible way in order
5746    to express the USE.  If ORIGINALP is true, prefer candidates from
5747    the original set of IVs, otherwise favor important candidates not
5748    based on any memory object.  */
5749
5750 static bool
5751 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5752                   struct iv_use *use, bool originalp)
5753 {
5754   comp_cost best_cost, act_cost;
5755   unsigned i;
5756   bitmap_iterator bi;
5757   struct iv_cand *cand;
5758   struct iv_ca_delta *best_delta = NULL, *act_delta;
5759   struct cost_pair *cp;
5760
5761   iv_ca_add_use (data, ivs, use, false);
5762   best_cost = iv_ca_cost (ivs);
5763
5764   cp = iv_ca_cand_for_use (ivs, use);
5765   if (!cp)
5766     {
5767       ivs->upto--;
5768       ivs->bad_uses--;
5769       iv_ca_add_use (data, ivs, use, true);
5770       best_cost = iv_ca_cost (ivs);
5771       cp = iv_ca_cand_for_use (ivs, use);
5772     }
5773   if (cp)
5774     {
5775       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5776       iv_ca_set_no_cp (data, ivs, use);
5777     }
5778
5779   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5780      first try important candidates not based on any memory object.  Only if
5781      this fails, try the specific ones.  Rationale -- in loops with many
5782      variables the best choice often is to use just one generic biv.  If we
5783      added here many ivs specific to the uses, the optimization algorithm later
5784      would be likely to get stuck in a local minimum, thus causing us to create
5785      too many ivs.  The approach from few ivs to more seems more likely to be
5786      successful -- starting from few ivs, replacing an expensive use by a
5787      specific iv should always be a win.  */
5788   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5789     {
5790       cand = iv_cand (data, i);
5791
5792       if (originalp && cand->pos !=IP_ORIGINAL)
5793         continue;
5794
5795       if (!originalp && cand->iv->base_object != NULL_TREE)
5796         continue;
5797
5798       if (iv_ca_cand_used_p (ivs, cand))
5799         continue;
5800
5801       cp = get_use_iv_cost (data, use, cand);
5802       if (!cp)
5803         continue;
5804
5805       iv_ca_set_cp (data, ivs, use, cp);
5806       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5807                                true);
5808       iv_ca_set_no_cp (data, ivs, use);
5809       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5810
5811       if (compare_costs (act_cost, best_cost) < 0)
5812         {
5813           best_cost = act_cost;
5814
5815           iv_ca_delta_free (&best_delta);
5816           best_delta = act_delta;
5817         }
5818       else
5819         iv_ca_delta_free (&act_delta);
5820     }
5821
5822   if (infinite_cost_p (best_cost))
5823     {
5824       for (i = 0; i < use->n_map_members; i++)
5825         {
5826           cp = use->cost_map + i;
5827           cand = cp->cand;
5828           if (!cand)
5829             continue;
5830
5831           /* Already tried this.  */
5832           if (cand->important)
5833             {
5834               if (originalp && cand->pos == IP_ORIGINAL)
5835                 continue;
5836               if (!originalp && cand->iv->base_object == NULL_TREE)
5837                 continue;
5838             }
5839
5840           if (iv_ca_cand_used_p (ivs, cand))
5841             continue;
5842
5843           act_delta = NULL;
5844           iv_ca_set_cp (data, ivs, use, cp);
5845           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5846           iv_ca_set_no_cp (data, ivs, use);
5847           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5848                                        cp, act_delta);
5849
5850           if (compare_costs (act_cost, best_cost) < 0)
5851             {
5852               best_cost = act_cost;
5853
5854               if (best_delta)
5855                 iv_ca_delta_free (&best_delta);
5856               best_delta = act_delta;
5857             }
5858           else
5859             iv_ca_delta_free (&act_delta);
5860         }
5861     }
5862
5863   iv_ca_delta_commit (data, ivs, best_delta, true);
5864   iv_ca_delta_free (&best_delta);
5865
5866   return !infinite_cost_p (best_cost);
5867 }
5868
5869 /* Finds an initial assignment of candidates to uses.  */
5870
5871 static struct iv_ca *
5872 get_initial_solution (struct ivopts_data *data, bool originalp)
5873 {
5874   struct iv_ca *ivs = iv_ca_new (data);
5875   unsigned i;
5876
5877   for (i = 0; i < n_iv_uses (data); i++)
5878     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5879       {
5880         iv_ca_free (&ivs);
5881         return NULL;
5882       }
5883
5884   return ivs;
5885 }
5886
5887 /* Tries to improve set of induction variables IVS.  */
5888
5889 static bool
5890 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5891 {
5892   unsigned i, n_ivs;
5893   comp_cost acost, best_cost = iv_ca_cost (ivs);
5894   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5895   struct iv_cand *cand;
5896
5897   /* Try extending the set of induction variables by one.  */
5898   for (i = 0; i < n_iv_cands (data); i++)
5899     {
5900       cand = iv_cand (data, i);
5901
5902       if (iv_ca_cand_used_p (ivs, cand))
5903         continue;
5904
5905       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5906       if (!act_delta)
5907         continue;
5908
5909       /* If we successfully added the candidate and the set is small enough,
5910          try optimizing it by removing other candidates.  */
5911       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5912         {
5913           iv_ca_delta_commit (data, ivs, act_delta, true);
5914           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5915           iv_ca_delta_commit (data, ivs, act_delta, false);
5916           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5917         }
5918
5919       if (compare_costs (acost, best_cost) < 0)
5920         {
5921           best_cost = acost;
5922           iv_ca_delta_free (&best_delta);
5923           best_delta = act_delta;
5924         }
5925       else
5926         iv_ca_delta_free (&act_delta);
5927     }
5928
5929   if (!best_delta)
5930     {
5931       /* Try removing the candidates from the set instead.  */
5932       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5933
5934       /* Nothing more we can do.  */
5935       if (!best_delta)
5936         return false;
5937     }
5938
5939   iv_ca_delta_commit (data, ivs, best_delta, true);
5940   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5941   iv_ca_delta_free (&best_delta);
5942   return true;
5943 }
5944
5945 /* Attempts to find the optimal set of induction variables.  We do simple
5946    greedy heuristic -- we try to replace at most one candidate in the selected
5947    solution and remove the unused ivs while this improves the cost.  */
5948
5949 static struct iv_ca *
5950 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5951 {
5952   struct iv_ca *set;
5953
5954   /* Get the initial solution.  */
5955   set = get_initial_solution (data, originalp);
5956   if (!set)
5957     {
5958       if (dump_file && (dump_flags & TDF_DETAILS))
5959         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5960       return NULL;
5961     }
5962
5963   if (dump_file && (dump_flags & TDF_DETAILS))
5964     {
5965       fprintf (dump_file, "Initial set of candidates:\n");
5966       iv_ca_dump (data, dump_file, set);
5967     }
5968
5969   while (try_improve_iv_set (data, set))
5970     {
5971       if (dump_file && (dump_flags & TDF_DETAILS))
5972         {
5973           fprintf (dump_file, "Improved to:\n");
5974           iv_ca_dump (data, dump_file, set);
5975         }
5976     }
5977
5978   return set;
5979 }
5980
5981 static struct iv_ca *
5982 find_optimal_iv_set (struct ivopts_data *data)
5983 {
5984   unsigned i;
5985   struct iv_ca *set, *origset;
5986   struct iv_use *use;
5987   comp_cost cost, origcost;
5988
5989   /* Determine the cost based on a strategy that starts with original IVs,
5990      and try again using a strategy that prefers candidates not based
5991      on any IVs.  */
5992   origset = find_optimal_iv_set_1 (data, true);
5993   set = find_optimal_iv_set_1 (data, false);
5994
5995   if (!origset && !set)
5996     return NULL;
5997
5998   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5999   cost = set ? iv_ca_cost (set) : infinite_cost;
6000
6001   if (dump_file && (dump_flags & TDF_DETAILS))
6002     {
6003       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6004                origcost.cost, origcost.complexity);
6005       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6006                cost.cost, cost.complexity);
6007     }
6008
6009   /* Choose the one with the best cost.  */
6010   if (compare_costs (origcost, cost) <= 0)
6011     {
6012       if (set)
6013         iv_ca_free (&set);
6014       set = origset;
6015     }
6016   else if (origset)
6017     iv_ca_free (&origset);
6018
6019   for (i = 0; i < n_iv_uses (data); i++)
6020     {
6021       use = iv_use (data, i);
6022       use->selected = iv_ca_cand_for_use (set, use)->cand;
6023     }
6024
6025   return set;
6026 }
6027
6028 /* Creates a new induction variable corresponding to CAND.  */
6029
6030 static void
6031 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6032 {
6033   gimple_stmt_iterator incr_pos;
6034   tree base;
6035   bool after = false;
6036
6037   if (!cand->iv)
6038     return;
6039
6040   switch (cand->pos)
6041     {
6042     case IP_NORMAL:
6043       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6044       break;
6045
6046     case IP_END:
6047       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6048       after = true;
6049       break;
6050
6051     case IP_AFTER_USE:
6052       after = true;
6053       /* fall through */
6054     case IP_BEFORE_USE:
6055       incr_pos = gsi_for_stmt (cand->incremented_at);
6056       break;
6057
6058     case IP_ORIGINAL:
6059       /* Mark that the iv is preserved.  */
6060       name_info (data, cand->var_before)->preserve_biv = true;
6061       name_info (data, cand->var_after)->preserve_biv = true;
6062
6063       /* Rewrite the increment so that it uses var_before directly.  */
6064       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6065       return;
6066     }
6067
6068   gimple_add_tmp_var (cand->var_before);
6069
6070   base = unshare_expr (cand->iv->base);
6071
6072   create_iv (base, unshare_expr (cand->iv->step),
6073              cand->var_before, data->current_loop,
6074              &incr_pos, after, &cand->var_before, &cand->var_after);
6075 }
6076
6077 /* Creates new induction variables described in SET.  */
6078
6079 static void
6080 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6081 {
6082   unsigned i;
6083   struct iv_cand *cand;
6084   bitmap_iterator bi;
6085
6086   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6087     {
6088       cand = iv_cand (data, i);
6089       create_new_iv (data, cand);
6090     }
6091
6092   if (dump_file && (dump_flags & TDF_DETAILS))
6093     {
6094       fprintf (dump_file, "\nSelected IV set: \n");
6095       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6096         {
6097           cand = iv_cand (data, i);
6098           dump_cand (dump_file, cand);
6099         }
6100       fprintf (dump_file, "\n");
6101     }
6102 }
6103
6104 /* Rewrites USE (definition of iv used in a nonlinear expression)
6105    using candidate CAND.  */
6106
6107 static void
6108 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6109                             struct iv_use *use, struct iv_cand *cand)
6110 {
6111   tree comp;
6112   tree op, tgt;
6113   gimple ass;
6114   gimple_stmt_iterator bsi;
6115
6116   /* An important special case -- if we are asked to express value of
6117      the original iv by itself, just exit; there is no need to
6118      introduce a new computation (that might also need casting the
6119      variable to unsigned and back).  */
6120   if (cand->pos == IP_ORIGINAL
6121       && cand->incremented_at == use->stmt)
6122     {
6123       enum tree_code stmt_code;
6124
6125       gcc_assert (is_gimple_assign (use->stmt));
6126       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6127
6128       /* Check whether we may leave the computation unchanged.
6129          This is the case only if it does not rely on other
6130          computations in the loop -- otherwise, the computation
6131          we rely upon may be removed in remove_unused_ivs,
6132          thus leading to ICE.  */
6133       stmt_code = gimple_assign_rhs_code (use->stmt);
6134       if (stmt_code == PLUS_EXPR
6135           || stmt_code == MINUS_EXPR
6136           || stmt_code == POINTER_PLUS_EXPR)
6137         {
6138           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6139             op = gimple_assign_rhs2 (use->stmt);
6140           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6141             op = gimple_assign_rhs1 (use->stmt);
6142           else
6143             op = NULL_TREE;
6144         }
6145       else
6146         op = NULL_TREE;
6147
6148       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6149         return;
6150     }
6151
6152   comp = get_computation (data->current_loop, use, cand);
6153   gcc_assert (comp != NULL_TREE);
6154
6155   switch (gimple_code (use->stmt))
6156     {
6157     case GIMPLE_PHI:
6158       tgt = PHI_RESULT (use->stmt);
6159
6160       /* If we should keep the biv, do not replace it.  */
6161       if (name_info (data, tgt)->preserve_biv)
6162         return;
6163
6164       bsi = gsi_after_labels (gimple_bb (use->stmt));
6165       break;
6166
6167     case GIMPLE_ASSIGN:
6168       tgt = gimple_assign_lhs (use->stmt);
6169       bsi = gsi_for_stmt (use->stmt);
6170       break;
6171
6172     default:
6173       gcc_unreachable ();
6174     }
6175
6176   if (!valid_gimple_rhs_p (comp)
6177       || (gimple_code (use->stmt) != GIMPLE_PHI
6178           /* We can't allow re-allocating the stmt as it might be pointed
6179              to still.  */
6180           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6181               >= gimple_num_ops (gsi_stmt (bsi)))))
6182     {
6183       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6184                                        true, GSI_SAME_STMT);
6185       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6186         {
6187           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6188           /* As this isn't a plain copy we have to reset alignment
6189              information.  */
6190           if (SSA_NAME_PTR_INFO (comp))
6191             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6192         }
6193     }
6194
6195   if (gimple_code (use->stmt) == GIMPLE_PHI)
6196     {
6197       ass = gimple_build_assign (tgt, comp);
6198       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6199
6200       bsi = gsi_for_stmt (use->stmt);
6201       remove_phi_node (&bsi, false);
6202     }
6203   else
6204     {
6205       gimple_assign_set_rhs_from_tree (&bsi, comp);
6206       use->stmt = gsi_stmt (bsi);
6207     }
6208 }
6209
6210 /* Performs a peephole optimization to reorder the iv update statement with
6211    a mem ref to enable instruction combining in later phases. The mem ref uses
6212    the iv value before the update, so the reordering transformation requires
6213    adjustment of the offset. CAND is the selected IV_CAND.
6214
6215    Example:
6216
6217    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6218    iv2 = iv1 + 1;
6219
6220    if (t < val)      (1)
6221      goto L;
6222    goto Head;
6223
6224
6225    directly propagating t over to (1) will introduce overlapping live range
6226    thus increase register pressure. This peephole transform it into:
6227
6228
6229    iv2 = iv1 + 1;
6230    t = MEM_REF (base, iv2, 8, 8);
6231    if (t < val)
6232      goto L;
6233    goto Head;
6234 */
6235
6236 static void
6237 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6238 {
6239   tree var_after;
6240   gimple iv_update, stmt;
6241   basic_block bb;
6242   gimple_stmt_iterator gsi, gsi_iv;
6243
6244   if (cand->pos != IP_NORMAL)
6245     return;
6246
6247   var_after = cand->var_after;
6248   iv_update = SSA_NAME_DEF_STMT (var_after);
6249
6250   bb = gimple_bb (iv_update);
6251   gsi = gsi_last_nondebug_bb (bb);
6252   stmt = gsi_stmt (gsi);
6253
6254   /* Only handle conditional statement for now.  */
6255   if (gimple_code (stmt) != GIMPLE_COND)
6256     return;
6257
6258   gsi_prev_nondebug (&gsi);
6259   stmt = gsi_stmt (gsi);
6260   if (stmt != iv_update)
6261     return;
6262
6263   gsi_prev_nondebug (&gsi);
6264   if (gsi_end_p (gsi))
6265     return;
6266
6267   stmt = gsi_stmt (gsi);
6268   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6269     return;
6270
6271   if (stmt != use->stmt)
6272     return;
6273
6274   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6275     return;
6276
6277   if (dump_file && (dump_flags & TDF_DETAILS))
6278     {
6279       fprintf (dump_file, "Reordering \n");
6280       print_gimple_stmt (dump_file, iv_update, 0, 0);
6281       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6282       fprintf (dump_file, "\n");
6283     }
6284
6285   gsi = gsi_for_stmt (use->stmt);
6286   gsi_iv = gsi_for_stmt (iv_update);
6287   gsi_move_before (&gsi_iv, &gsi);
6288
6289   cand->pos = IP_BEFORE_USE;
6290   cand->incremented_at = use->stmt;
6291 }
6292
6293 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6294
6295 static void
6296 rewrite_use_address (struct ivopts_data *data,
6297                      struct iv_use *use, struct iv_cand *cand)
6298 {
6299   aff_tree aff;
6300   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6301   tree base_hint = NULL_TREE;
6302   tree ref, iv;
6303   bool ok;
6304
6305   adjust_iv_update_pos (cand, use);
6306   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6307   gcc_assert (ok);
6308   unshare_aff_combination (&aff);
6309
6310   /* To avoid undefined overflow problems, all IV candidates use unsigned
6311      integer types.  The drawback is that this makes it impossible for
6312      create_mem_ref to distinguish an IV that is based on a memory object
6313      from one that represents simply an offset.
6314
6315      To work around this problem, we pass a hint to create_mem_ref that
6316      indicates which variable (if any) in aff is an IV based on a memory
6317      object.  Note that we only consider the candidate.  If this is not
6318      based on an object, the base of the reference is in some subexpression
6319      of the use -- but these will use pointer types, so they are recognized
6320      by the create_mem_ref heuristics anyway.  */
6321   if (cand->iv->base_object)
6322     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6323
6324   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6325   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6326                         reference_alias_ptr_type (*use->op_p),
6327                         iv, base_hint, data->speed);
6328   copy_ref_info (ref, *use->op_p);
6329   *use->op_p = ref;
6330 }
6331
6332 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6333    candidate CAND.  */
6334
6335 static void
6336 rewrite_use_compare (struct ivopts_data *data,
6337                      struct iv_use *use, struct iv_cand *cand)
6338 {
6339   tree comp, *var_p, op, bound;
6340   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6341   enum tree_code compare;
6342   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6343   bool ok;
6344
6345   bound = cp->value;
6346   if (bound)
6347     {
6348       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6349       tree var_type = TREE_TYPE (var);
6350       gimple_seq stmts;
6351
6352       if (dump_file && (dump_flags & TDF_DETAILS))
6353         {
6354           fprintf (dump_file, "Replacing exit test: ");
6355           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6356         }
6357       compare = cp->comp;
6358       bound = unshare_expr (fold_convert (var_type, bound));
6359       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6360       if (stmts)
6361         gsi_insert_seq_on_edge_immediate (
6362                 loop_preheader_edge (data->current_loop),
6363                 stmts);
6364
6365       gimple_cond_set_lhs (use->stmt, var);
6366       gimple_cond_set_code (use->stmt, compare);
6367       gimple_cond_set_rhs (use->stmt, op);
6368       return;
6369     }
6370
6371   /* The induction variable elimination failed; just express the original
6372      giv.  */
6373   comp = get_computation (data->current_loop, use, cand);
6374   gcc_assert (comp != NULL_TREE);
6375
6376   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6377   gcc_assert (ok);
6378
6379   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6380                                      true, GSI_SAME_STMT);
6381 }
6382
6383 /* Rewrites USE using candidate CAND.  */
6384
6385 static void
6386 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6387 {
6388   switch (use->type)
6389     {
6390       case USE_NONLINEAR_EXPR:
6391         rewrite_use_nonlinear_expr (data, use, cand);
6392         break;
6393
6394       case USE_ADDRESS:
6395         rewrite_use_address (data, use, cand);
6396         break;
6397
6398       case USE_COMPARE:
6399         rewrite_use_compare (data, use, cand);
6400         break;
6401
6402       default:
6403         gcc_unreachable ();
6404     }
6405
6406   update_stmt (use->stmt);
6407 }
6408
6409 /* Rewrite the uses using the selected induction variables.  */
6410
6411 static void
6412 rewrite_uses (struct ivopts_data *data)
6413 {
6414   unsigned i;
6415   struct iv_cand *cand;
6416   struct iv_use *use;
6417
6418   for (i = 0; i < n_iv_uses (data); i++)
6419     {
6420       use = iv_use (data, i);
6421       cand = use->selected;
6422       gcc_assert (cand);
6423
6424       rewrite_use (data, use, cand);
6425     }
6426 }
6427
6428 /* Removes the ivs that are not used after rewriting.  */
6429
6430 static void
6431 remove_unused_ivs (struct ivopts_data *data)
6432 {
6433   unsigned j;
6434   bitmap_iterator bi;
6435   bitmap toremove = BITMAP_ALLOC (NULL);
6436
6437   /* Figure out an order in which to release SSA DEFs so that we don't
6438      release something that we'd have to propagate into a debug stmt
6439      afterwards.  */
6440   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6441     {
6442       struct version_info *info;
6443
6444       info = ver_info (data, j);
6445       if (info->iv
6446           && !integer_zerop (info->iv->step)
6447           && !info->inv_id
6448           && !info->iv->have_use_for
6449           && !info->preserve_biv)
6450         {
6451           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6452
6453           tree def = info->iv->ssa_name;
6454
6455           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6456             {
6457               imm_use_iterator imm_iter;
6458               use_operand_p use_p;
6459               gimple stmt;
6460               int count = 0;
6461
6462               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6463                 {
6464                   if (!gimple_debug_bind_p (stmt))
6465                     continue;
6466
6467                   /* We just want to determine whether to do nothing
6468                      (count == 0), to substitute the computed
6469                      expression into a single use of the SSA DEF by
6470                      itself (count == 1), or to use a debug temp
6471                      because the SSA DEF is used multiple times or as
6472                      part of a larger expression (count > 1). */
6473                   count++;
6474                   if (gimple_debug_bind_get_value (stmt) != def)
6475                     count++;
6476
6477                   if (count > 1)
6478                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6479                 }
6480
6481               if (!count)
6482                 continue;
6483
6484               struct iv_use dummy_use;
6485               struct iv_cand *best_cand = NULL, *cand;
6486               unsigned i, best_pref = 0, cand_pref;
6487
6488               memset (&dummy_use, 0, sizeof (dummy_use));
6489               dummy_use.iv = info->iv;
6490               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6491                 {
6492                   cand = iv_use (data, i)->selected;
6493                   if (cand == best_cand)
6494                     continue;
6495                   cand_pref = operand_equal_p (cand->iv->step,
6496                                                info->iv->step, 0)
6497                     ? 4 : 0;
6498                   cand_pref
6499                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6500                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6501                     ? 2 : 0;
6502                   cand_pref
6503                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6504                     ? 1 : 0;
6505                   if (best_cand == NULL || best_pref < cand_pref)
6506                     {
6507                       best_cand = cand;
6508                       best_pref = cand_pref;
6509                     }
6510                 }
6511
6512               if (!best_cand)
6513                 continue;
6514
6515               tree comp = get_computation_at (data->current_loop,
6516                                               &dummy_use, best_cand,
6517                                               SSA_NAME_DEF_STMT (def));
6518               if (!comp)
6519                 continue;
6520
6521               if (count > 1)
6522                 {
6523                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6524                   DECL_ARTIFICIAL (vexpr) = 1;
6525                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6526                   if (SSA_NAME_VAR (def))
6527                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6528                   else
6529                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6530                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6531                   gimple_stmt_iterator gsi;
6532
6533                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6534                     gsi = gsi_after_labels (gimple_bb
6535                                             (SSA_NAME_DEF_STMT (def)));
6536                   else
6537                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6538
6539                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6540                   comp = vexpr;
6541                 }
6542
6543               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6544                 {
6545                   if (!gimple_debug_bind_p (stmt))
6546                     continue;
6547
6548                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6549                     SET_USE (use_p, comp);
6550
6551                   update_stmt (stmt);
6552                 }
6553             }
6554         }
6555     }
6556
6557   release_defs_bitset (toremove);
6558
6559   BITMAP_FREE (toremove);
6560 }
6561
6562 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6563    for pointer_map_traverse.  */
6564
6565 static bool
6566 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6567                       void *data ATTRIBUTE_UNUSED)
6568 {
6569   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6570
6571   free (niter);
6572   return true;
6573 }
6574
6575 /* Frees data allocated by the optimization of a single loop.  */
6576
6577 static void
6578 free_loop_data (struct ivopts_data *data)
6579 {
6580   unsigned i, j;
6581   bitmap_iterator bi;
6582   tree obj;
6583
6584   if (data->niters)
6585     {
6586       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6587       pointer_map_destroy (data->niters);
6588       data->niters = NULL;
6589     }
6590
6591   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6592     {
6593       struct version_info *info;
6594
6595       info = ver_info (data, i);
6596       free (info->iv);
6597       info->iv = NULL;
6598       info->has_nonlin_use = false;
6599       info->preserve_biv = false;
6600       info->inv_id = 0;
6601     }
6602   bitmap_clear (data->relevant);
6603   bitmap_clear (data->important_candidates);
6604
6605   for (i = 0; i < n_iv_uses (data); i++)
6606     {
6607       struct iv_use *use = iv_use (data, i);
6608
6609       free (use->iv);
6610       BITMAP_FREE (use->related_cands);
6611       for (j = 0; j < use->n_map_members; j++)
6612         if (use->cost_map[j].depends_on)
6613           BITMAP_FREE (use->cost_map[j].depends_on);
6614       free (use->cost_map);
6615       free (use);
6616     }
6617   data->iv_uses.truncate (0);
6618
6619   for (i = 0; i < n_iv_cands (data); i++)
6620     {
6621       struct iv_cand *cand = iv_cand (data, i);
6622
6623       free (cand->iv);
6624       if (cand->depends_on)
6625         BITMAP_FREE (cand->depends_on);
6626       free (cand);
6627     }
6628   data->iv_candidates.truncate (0);
6629
6630   if (data->version_info_size < num_ssa_names)
6631     {
6632       data->version_info_size = 2 * num_ssa_names;
6633       free (data->version_info);
6634       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6635     }
6636
6637   data->max_inv_id = 0;
6638
6639   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6640     SET_DECL_RTL (obj, NULL_RTX);
6641
6642   decl_rtl_to_reset.truncate (0);
6643
6644   data->inv_expr_tab.empty ();
6645   data->inv_expr_id = 0;
6646 }
6647
6648 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6649    loop tree.  */
6650
6651 static void
6652 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6653 {
6654   free_loop_data (data);
6655   free (data->version_info);
6656   BITMAP_FREE (data->relevant);
6657   BITMAP_FREE (data->important_candidates);
6658
6659   decl_rtl_to_reset.release ();
6660   data->iv_uses.release ();
6661   data->iv_candidates.release ();
6662   data->inv_expr_tab.dispose ();
6663 }
6664
6665 /* Returns true if the loop body BODY includes any function calls.  */
6666
6667 static bool
6668 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6669 {
6670   gimple_stmt_iterator gsi;
6671   unsigned i;
6672
6673   for (i = 0; i < num_nodes; i++)
6674     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6675       {
6676         gimple stmt = gsi_stmt (gsi);
6677         if (is_gimple_call (stmt)
6678             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6679           return true;
6680       }
6681   return false;
6682 }
6683
6684 /* Optimizes the LOOP.  Returns true if anything changed.  */
6685
6686 static bool
6687 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6688 {
6689   bool changed = false;
6690   struct iv_ca *iv_ca;
6691   edge exit = single_dom_exit (loop);
6692   basic_block *body;
6693
6694   gcc_assert (!data->niters);
6695   data->current_loop = loop;
6696   data->speed = optimize_loop_for_speed_p (loop);
6697
6698   if (dump_file && (dump_flags & TDF_DETAILS))
6699     {
6700       fprintf (dump_file, "Processing loop %d\n", loop->num);
6701
6702       if (exit)
6703         {
6704           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6705                    exit->src->index, exit->dest->index);
6706           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6707           fprintf (dump_file, "\n");
6708         }
6709
6710       fprintf (dump_file, "\n");
6711     }
6712
6713   body = get_loop_body (loop);
6714   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6715   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6716   free (body);
6717
6718   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6719
6720   /* For each ssa name determines whether it behaves as an induction variable
6721      in some loop.  */
6722   if (!find_induction_variables (data))
6723     goto finish;
6724
6725   /* Finds interesting uses (item 1).  */
6726   find_interesting_uses (data);
6727   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6728     goto finish;
6729
6730   /* Finds candidates for the induction variables (item 2).  */
6731   find_iv_candidates (data);
6732
6733   /* Calculates the costs (item 3, part 1).  */
6734   determine_iv_costs (data);
6735   determine_use_iv_costs (data);
6736   determine_set_costs (data);
6737
6738   /* Find the optimal set of induction variables (item 3, part 2).  */
6739   iv_ca = find_optimal_iv_set (data);
6740   if (!iv_ca)
6741     goto finish;
6742   changed = true;
6743
6744   /* Create the new induction variables (item 4, part 1).  */
6745   create_new_ivs (data, iv_ca);
6746   iv_ca_free (&iv_ca);
6747
6748   /* Rewrite the uses (item 4, part 2).  */
6749   rewrite_uses (data);
6750
6751   /* Remove the ivs that are unused after rewriting.  */
6752   remove_unused_ivs (data);
6753
6754   /* We have changed the structure of induction variables; it might happen
6755      that definitions in the scev database refer to some of them that were
6756      eliminated.  */
6757   scev_reset ();
6758
6759 finish:
6760   free_loop_data (data);
6761
6762   return changed;
6763 }
6764
6765 /* Main entry point.  Optimizes induction variables in loops.  */
6766
6767 void
6768 tree_ssa_iv_optimize (void)
6769 {
6770   struct loop *loop;
6771   struct ivopts_data data;
6772   loop_iterator li;
6773
6774   tree_ssa_iv_optimize_init (&data);
6775
6776   /* Optimize the loops starting with the innermost ones.  */
6777   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6778     {
6779       if (dump_file && (dump_flags & TDF_DETAILS))
6780         flow_loop_dump (loop, dump_file, NULL, 1);
6781
6782       tree_ssa_iv_optimize_loop (&data, loop);
6783     }
6784
6785   tree_ssa_iv_optimize_finalize (&data);
6786 }