gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "gimple.h"
  73 #include "gimple-ssa.h"
  74 #include "cgraph.h"
  75 #include "tree-cfg.h"
  76 #include "tree-phinodes.h"
  77 #include "ssa-iterators.h"
  78 #include "tree-ssanames.h"
  79 #include "tree-ssa-loop-ivopts.h"
  80 #include "tree-ssa-loop-manip.h"
  81 #include "tree-ssa-loop-niter.h"
  82 #include "tree-ssa-loop.h"
  83 #include "tree-dfa.h"
  84 #include "tree-ssa.h"
  85 #include "cfgloop.h"
  86 #include "tree-pass.h"
  87 #include "ggc.h"
  88 #include "insn-config.h"
  89 #include "pointer-set.h"
  90 #include "hash-table.h"
  91 #include "tree-chrec.h"
  92 #include "tree-scalar-evolution.h"
  93 #include "cfgloop.h"
  94 #include "params.h"
  95 #include "langhooks.h"
  96 #include "tree-affine.h"
  97 #include "target.h"
  98 #include "tree-inline.h"
  99 #include "tree-ssa-propagate.h"
 100 #include "expmed.h"
 101 #include "tree-ssa-address.h"
 102
 103 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 104    cost of different addressing modes.  This should be moved to a TBD
 105    interface between the GIMPLE and RTL worlds.  */
 106 #include "expr.h"
 107 #include "recog.h"
 108
 109 /* The infinite cost.  */
 110 #define INFTY 10000000
 111
 112 #define AVG_LOOP_NITER(LOOP) 5
 113
 114 /* Returns the expected number of loop iterations for LOOP.
 115    The average trip count is computed from profile data if it
 116    exists. */
 117
 118 static inline HOST_WIDE_INT
 119 avg_loop_niter (struct loop *loop)
 120 {
 121   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 122   if (niter == -1)
 123     return AVG_LOOP_NITER (loop);
 124
 125   return niter;
 126 }
 127
 128 /* Representation of the induction variable.  */
 129 struct iv
 130 {
 131   tree base;            /* Initial value of the iv.  */
 132   tree base_object;     /* A memory object to that the induction variable points.  */
 133   tree step;            /* Step of the iv (constant only).  */
 134   tree ssa_name;        /* The ssa name with the value.  */
 135   bool biv_p;           /* Is it a biv?  */
 136   bool have_use_for;    /* Do we already have a use for it?  */
 137   unsigned use_id;      /* The identifier in the use if it is the case.  */
 138 };
 139
 140 /* Per-ssa version information (induction variable descriptions, etc.).  */
 141 struct version_info
 142 {
 143   tree name;            /* The ssa name.  */
 144   struct iv *iv;        /* Induction variable description.  */
 145   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 146                            an expression that is not an induction variable.  */
 147   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 148   unsigned inv_id;      /* Id of an invariant.  */
 149 };
 150
 151 /* Types of uses.  */
 152 enum use_type
 153 {
 154   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 155   USE_ADDRESS,          /* Use in an address.  */
 156   USE_COMPARE           /* Use is a compare.  */
 157 };
 158
 159 /* Cost of a computation.  */
 160 typedef struct
 161 {
 162   int cost;             /* The runtime cost.  */
 163   unsigned complexity;  /* The estimate of the complexity of the code for
 164                            the computation (in no concrete units --
 165                            complexity field should be larger for more
 166                            complex expressions and addressing modes).  */
 167 } comp_cost;
 168
 169 static const comp_cost no_cost = {0, 0};
 170 static const comp_cost infinite_cost = {INFTY, INFTY};
 171
 172 /* The candidate - cost pair.  */
 173 struct cost_pair
 174 {
 175   struct iv_cand *cand; /* The candidate.  */
 176   comp_cost cost;       /* The cost.  */
 177   bitmap depends_on;    /* The list of invariants that have to be
 178                            preserved.  */
 179   tree value;           /* For final value elimination, the expression for
 180                            the final value of the iv.  For iv elimination,
 181                            the new bound to compare with.  */
 182   enum tree_code comp;  /* For iv elimination, the comparison.  */
 183   int inv_expr_id;      /* Loop invariant expression id.  */
 184 };
 185
 186 /* Use.  */
 187 struct iv_use
 188 {
 189   unsigned id;          /* The id of the use.  */
 190   enum use_type type;   /* Type of the use.  */
 191   struct iv *iv;        /* The induction variable it is based on.  */
 192   gimple stmt;          /* Statement in that it occurs.  */
 193   tree *op_p;           /* The place where it occurs.  */
 194   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 195                            important ones.  */
 196
 197   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 198   struct cost_pair *cost_map;
 199                         /* The costs wrto the iv candidates.  */
 200
 201   struct iv_cand *selected;
 202                         /* The selected candidate.  */
 203 };
 204
 205 /* The position where the iv is computed.  */
 206 enum iv_position
 207 {
 208   IP_NORMAL,            /* At the end, just before the exit condition.  */
 209   IP_END,               /* At the end of the latch block.  */
 210   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 211   IP_AFTER_USE,         /* Immediately after a specific use.  */
 212   IP_ORIGINAL           /* The original biv.  */
 213 };
 214
 215 /* The induction variable candidate.  */
 216 struct iv_cand
 217 {
 218   unsigned id;          /* The number of the candidate.  */
 219   bool important;       /* Whether this is an "important" candidate, i.e. such
 220                            that it should be considered by all uses.  */
 221   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 222   gimple incremented_at;/* For original biv, the statement where it is
 223                            incremented.  */
 224   tree var_before;      /* The variable used for it before increment.  */
 225   tree var_after;       /* The variable used for it after increment.  */
 226   struct iv *iv;        /* The value of the candidate.  NULL for
 227                            "pseudocandidate" used to indicate the possibility
 228                            to replace the final value of an iv by direct
 229                            computation of the value.  */
 230   unsigned cost;        /* Cost of the candidate.  */
 231   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 232   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 233                               where it is incremented.  */
 234   bitmap depends_on;    /* The list of invariants that are used in step of the
 235                            biv.  */
 236 };
 237
 238 /* Loop invariant expression hashtable entry.  */
 239 struct iv_inv_expr_ent
 240 {
 241   tree expr;
 242   int id;
 243   hashval_t hash;
 244 };
 245
 246 /* The data used by the induction variable optimizations.  */
 247
 248 typedef struct iv_use *iv_use_p;
 249
 250 typedef struct iv_cand *iv_cand_p;
 251
 252 /* Hashtable helpers.  */
 253
 254 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 255 {
 256   typedef iv_inv_expr_ent value_type;
 257   typedef iv_inv_expr_ent compare_type;
 258   static inline hashval_t hash (const value_type *);
 259   static inline bool equal (const value_type *, const compare_type *);
 260 };
 261
 262 /* Hash function for loop invariant expressions.  */
 263
 264 inline hashval_t
 265 iv_inv_expr_hasher::hash (const value_type *expr)
 266 {
 267   return expr->hash;
 268 }
 269
 270 /* Hash table equality function for expressions.  */
 271
 272 inline bool
 273 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 274 {
 275   return expr1->hash == expr2->hash
 276          && operand_equal_p (expr1->expr, expr2->expr, 0);
 277 }
 278
 279 struct ivopts_data
 280 {
 281   /* The currently optimized loop.  */
 282   struct loop *current_loop;
 283
 284   /* Numbers of iterations for all exits of the current loop.  */
 285   struct pointer_map_t *niters;
 286
 287   /* Number of registers used in it.  */
 288   unsigned regs_used;
 289
 290   /* The size of version_info array allocated.  */
 291   unsigned version_info_size;
 292
 293   /* The array of information for the ssa names.  */
 294   struct version_info *version_info;
 295
 296   /* The hashtable of loop invariant expressions created
 297      by ivopt.  */
 298   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 299
 300   /* Loop invariant expression id.  */
 301   int inv_expr_id;
 302
 303   /* The bitmap of indices in version_info whose value was changed.  */
 304   bitmap relevant;
 305
 306   /* The uses of induction variables.  */
 307   vec<iv_use_p> iv_uses;
 308
 309   /* The candidates.  */
 310   vec<iv_cand_p> iv_candidates;
 311
 312   /* A bitmap of important candidates.  */
 313   bitmap important_candidates;
 314
 315   /* The maximum invariant id.  */
 316   unsigned max_inv_id;
 317
 318   /* Whether to consider just related and important candidates when replacing a
 319      use.  */
 320   bool consider_all_candidates;
 321
 322   /* Are we optimizing for speed?  */
 323   bool speed;
 324
 325   /* Whether the loop body includes any function calls.  */
 326   bool body_includes_call;
 327
 328   /* Whether the loop body can only be exited via single exit.  */
 329   bool loop_single_exit_p;
 330 };
 331
 332 /* An assignment of iv candidates to uses.  */
 333
 334 struct iv_ca
 335 {
 336   /* The number of uses covered by the assignment.  */
 337   unsigned upto;
 338
 339   /* Number of uses that cannot be expressed by the candidates in the set.  */
 340   unsigned bad_uses;
 341
 342   /* Candidate assigned to a use, together with the related costs.  */
 343   struct cost_pair **cand_for_use;
 344
 345   /* Number of times each candidate is used.  */
 346   unsigned *n_cand_uses;
 347
 348   /* The candidates used.  */
 349   bitmap cands;
 350
 351   /* The number of candidates in the set.  */
 352   unsigned n_cands;
 353
 354   /* Total number of registers needed.  */
 355   unsigned n_regs;
 356
 357   /* Total cost of expressing uses.  */
 358   comp_cost cand_use_cost;
 359
 360   /* Total cost of candidates.  */
 361   unsigned cand_cost;
 362
 363   /* Number of times each invariant is used.  */
 364   unsigned *n_invariant_uses;
 365
 366   /* The array holding the number of uses of each loop
 367      invariant expressions created by ivopt.  */
 368   unsigned *used_inv_expr;
 369
 370   /* The number of created loop invariants.  */
 371   unsigned num_used_inv_expr;
 372
 373   /* Total cost of the assignment.  */
 374   comp_cost cost;
 375 };
 376
 377 /* Difference of two iv candidate assignments.  */
 378
 379 struct iv_ca_delta
 380 {
 381   /* Changed use.  */
 382   struct iv_use *use;
 383
 384   /* An old assignment (for rollback purposes).  */
 385   struct cost_pair *old_cp;
 386
 387   /* A new assignment.  */
 388   struct cost_pair *new_cp;
 389
 390   /* Next change in the list.  */
 391   struct iv_ca_delta *next_change;
 392 };
 393
 394 /* Bound on number of candidates below that all candidates are considered.  */
 395
 396 #define CONSIDER_ALL_CANDIDATES_BOUND \
 397   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 398
 399 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 400    optimizing such a loop would help, and it would take ages).  */
 401
 402 #define MAX_CONSIDERED_USES \
 403   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 404
 405 /* If there are at most this number of ivs in the set, try removing unnecessary
 406    ivs from the set always.  */
 407
 408 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 409   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 410
 411 /* The list of trees for that the decl_rtl field must be reset is stored
 412    here.  */
 413
 414 static vec<tree> decl_rtl_to_reset;
 415
 416 static comp_cost force_expr_to_var_cost (tree, bool);
 417
 418 /* Number of uses recorded in DATA.  */
 419
 420 static inline unsigned
 421 n_iv_uses (struct ivopts_data *data)
 422 {
 423   return data->iv_uses.length ();
 424 }
 425
 426 /* Ith use recorded in DATA.  */
 427
 428 static inline struct iv_use *
 429 iv_use (struct ivopts_data *data, unsigned i)
 430 {
 431   return data->iv_uses[i];
 432 }
 433
 434 /* Number of candidates recorded in DATA.  */
 435
 436 static inline unsigned
 437 n_iv_cands (struct ivopts_data *data)
 438 {
 439   return data->iv_candidates.length ();
 440 }
 441
 442 /* Ith candidate recorded in DATA.  */
 443
 444 static inline struct iv_cand *
 445 iv_cand (struct ivopts_data *data, unsigned i)
 446 {
 447   return data->iv_candidates[i];
 448 }
 449
 450 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 451
 452 edge
 453 single_dom_exit (struct loop *loop)
 454 {
 455   edge exit = single_exit (loop);
 456
 457   if (!exit)
 458     return NULL;
 459
 460   if (!just_once_each_iteration_p (loop, exit->src))
 461     return NULL;
 462
 463   return exit;
 464 }
 465
 466 /* Dumps information about the induction variable IV to FILE.  */
 467
 468 void
 469 dump_iv (FILE *file, struct iv *iv)
 470 {
 471   if (iv->ssa_name)
 472     {
 473       fprintf (file, "ssa name ");
 474       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 475       fprintf (file, "\n");
 476     }
 477
 478   fprintf (file, "  type ");
 479   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 480   fprintf (file, "\n");
 481
 482   if (iv->step)
 483     {
 484       fprintf (file, "  base ");
 485       print_generic_expr (file, iv->base, TDF_SLIM);
 486       fprintf (file, "\n");
 487
 488       fprintf (file, "  step ");
 489       print_generic_expr (file, iv->step, TDF_SLIM);
 490       fprintf (file, "\n");
 491     }
 492   else
 493     {
 494       fprintf (file, "  invariant ");
 495       print_generic_expr (file, iv->base, TDF_SLIM);
 496       fprintf (file, "\n");
 497     }
 498
 499   if (iv->base_object)
 500     {
 501       fprintf (file, "  base object ");
 502       print_generic_expr (file, iv->base_object, TDF_SLIM);
 503       fprintf (file, "\n");
 504     }
 505
 506   if (iv->biv_p)
 507     fprintf (file, "  is a biv\n");
 508 }
 509
 510 /* Dumps information about the USE to FILE.  */
 511
 512 void
 513 dump_use (FILE *file, struct iv_use *use)
 514 {
 515   fprintf (file, "use %d\n", use->id);
 516
 517   switch (use->type)
 518     {
 519     case USE_NONLINEAR_EXPR:
 520       fprintf (file, "  generic\n");
 521       break;
 522
 523     case USE_ADDRESS:
 524       fprintf (file, "  address\n");
 525       break;
 526
 527     case USE_COMPARE:
 528       fprintf (file, "  compare\n");
 529       break;
 530
 531     default:
 532       gcc_unreachable ();
 533     }
 534
 535   fprintf (file, "  in statement ");
 536   print_gimple_stmt (file, use->stmt, 0, 0);
 537   fprintf (file, "\n");
 538
 539   fprintf (file, "  at position ");
 540   if (use->op_p)
 541     print_generic_expr (file, *use->op_p, TDF_SLIM);
 542   fprintf (file, "\n");
 543
 544   dump_iv (file, use->iv);
 545
 546   if (use->related_cands)
 547     {
 548       fprintf (file, "  related candidates ");
 549       dump_bitmap (file, use->related_cands);
 550     }
 551 }
 552
 553 /* Dumps information about the uses to FILE.  */
 554
 555 void
 556 dump_uses (FILE *file, struct ivopts_data *data)
 557 {
 558   unsigned i;
 559   struct iv_use *use;
 560
 561   for (i = 0; i < n_iv_uses (data); i++)
 562     {
 563       use = iv_use (data, i);
 564
 565       dump_use (file, use);
 566       fprintf (file, "\n");
 567     }
 568 }
 569
 570 /* Dumps information about induction variable candidate CAND to FILE.  */
 571
 572 void
 573 dump_cand (FILE *file, struct iv_cand *cand)
 574 {
 575   struct iv *iv = cand->iv;
 576
 577   fprintf (file, "candidate %d%s\n",
 578            cand->id, cand->important ? " (important)" : "");
 579
 580   if (cand->depends_on)
 581     {
 582       fprintf (file, "  depends on ");
 583       dump_bitmap (file, cand->depends_on);
 584     }
 585
 586   if (!iv)
 587     {
 588       fprintf (file, "  final value replacement\n");
 589       return;
 590     }
 591
 592   if (cand->var_before)
 593     {
 594       fprintf (file, "  var_before ");
 595       print_generic_expr (file, cand->var_before, TDF_SLIM);
 596       fprintf (file, "\n");
 597     }
 598   if (cand->var_after)
 599     {
 600       fprintf (file, "  var_after ");
 601       print_generic_expr (file, cand->var_after, TDF_SLIM);
 602       fprintf (file, "\n");
 603     }
 604
 605   switch (cand->pos)
 606     {
 607     case IP_NORMAL:
 608       fprintf (file, "  incremented before exit test\n");
 609       break;
 610
 611     case IP_BEFORE_USE:
 612       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 613       break;
 614
 615     case IP_AFTER_USE:
 616       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 617       break;
 618
 619     case IP_END:
 620       fprintf (file, "  incremented at end\n");
 621       break;
 622
 623     case IP_ORIGINAL:
 624       fprintf (file, "  original biv\n");
 625       break;
 626     }
 627
 628   dump_iv (file, iv);
 629 }
 630
 631 /* Returns the info for ssa version VER.  */
 632
 633 static inline struct version_info *
 634 ver_info (struct ivopts_data *data, unsigned ver)
 635 {
 636   return data->version_info + ver;
 637 }
 638
 639 /* Returns the info for ssa name NAME.  */
 640
 641 static inline struct version_info *
 642 name_info (struct ivopts_data *data, tree name)
 643 {
 644   return ver_info (data, SSA_NAME_VERSION (name));
 645 }
 646
 647 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 648    emitted in LOOP.  */
 649
 650 static bool
 651 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 652 {
 653   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 654
 655   gcc_assert (bb);
 656
 657   if (sbb == loop->latch)
 658     return true;
 659
 660   if (sbb != bb)
 661     return false;
 662
 663   return stmt == last_stmt (bb);
 664 }
 665
 666 /* Returns true if STMT if after the place where the original induction
 667    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 668    if the positions are identical.  */
 669
 670 static bool
 671 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 672 {
 673   basic_block cand_bb = gimple_bb (cand->incremented_at);
 674   basic_block stmt_bb = gimple_bb (stmt);
 675
 676   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 677     return false;
 678
 679   if (stmt_bb != cand_bb)
 680     return true;
 681
 682   if (true_if_equal
 683       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 684     return true;
 685   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 686 }
 687
 688 /* Returns true if STMT if after the place where the induction variable
 689    CAND is incremented in LOOP.  */
 690
 691 static bool
 692 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 693 {
 694   switch (cand->pos)
 695     {
 696     case IP_END:
 697       return false;
 698
 699     case IP_NORMAL:
 700       return stmt_after_ip_normal_pos (loop, stmt);
 701
 702     case IP_ORIGINAL:
 703     case IP_AFTER_USE:
 704       return stmt_after_inc_pos (cand, stmt, false);
 705
 706     case IP_BEFORE_USE:
 707       return stmt_after_inc_pos (cand, stmt, true);
 708
 709     default:
 710       gcc_unreachable ();
 711     }
 712 }
 713
 714 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 715
 716 static bool
 717 abnormal_ssa_name_p (tree exp)
 718 {
 719   if (!exp)
 720     return false;
 721
 722   if (TREE_CODE (exp) != SSA_NAME)
 723     return false;
 724
 725   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 726 }
 727
 728 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 729    abnormal phi node.  Callback for for_each_index.  */
 730
 731 static bool
 732 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 733                                   void *data ATTRIBUTE_UNUSED)
 734 {
 735   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 736     {
 737       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 738         return false;
 739       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 740         return false;
 741     }
 742
 743   return !abnormal_ssa_name_p (*index);
 744 }
 745
 746 /* Returns true if EXPR contains a ssa name that occurs in an
 747    abnormal phi node.  */
 748
 749 bool
 750 contains_abnormal_ssa_name_p (tree expr)
 751 {
 752   enum tree_code code;
 753   enum tree_code_class codeclass;
 754
 755   if (!expr)
 756     return false;
 757
 758   code = TREE_CODE (expr);
 759   codeclass = TREE_CODE_CLASS (code);
 760
 761   if (code == SSA_NAME)
 762     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 763
 764   if (code == INTEGER_CST
 765       || is_gimple_min_invariant (expr))
 766     return false;
 767
 768   if (code == ADDR_EXPR)
 769     return !for_each_index (&TREE_OPERAND (expr, 0),
 770                             idx_contains_abnormal_ssa_name_p,
 771                             NULL);
 772
 773   if (code == COND_EXPR)
 774     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 775       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 776       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 777
 778   switch (codeclass)
 779     {
 780     case tcc_binary:
 781     case tcc_comparison:
 782       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 783         return true;
 784
 785       /* Fallthru.  */
 786     case tcc_unary:
 787       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 788         return true;
 789
 790       break;
 791
 792     default:
 793       gcc_unreachable ();
 794     }
 795
 796   return false;
 797 }
 798
 799 /*  Returns the structure describing number of iterations determined from
 800     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 801
 802 static struct tree_niter_desc *
 803 niter_for_exit (struct ivopts_data *data, edge exit)
 804 {
 805   struct tree_niter_desc *desc;
 806   void **slot;
 807
 808   if (!data->niters)
 809     {
 810       data->niters = pointer_map_create ();
 811       slot = NULL;
 812     }
 813   else
 814     slot = pointer_map_contains (data->niters, exit);
 815
 816   if (!slot)
 817     {
 818       /* Try to determine number of iterations.  We cannot safely work with ssa
 819          names that appear in phi nodes on abnormal edges, so that we do not
 820          create overlapping life ranges for them (PR 27283).  */
 821       desc = XNEW (struct tree_niter_desc);
 822       if (!number_of_iterations_exit (data->current_loop,
 823                                       exit, desc, true)
 824           || contains_abnormal_ssa_name_p (desc->niter))
 825         {
 826           XDELETE (desc);
 827           desc = NULL;
 828         }
 829       slot = pointer_map_insert (data->niters, exit);
 830       *slot = desc;
 831     }
 832   else
 833     desc = (struct tree_niter_desc *) *slot;
 834
 835   return desc;
 836 }
 837
 838 /* Returns the structure describing number of iterations determined from
 839    single dominating exit of DATA->current_loop, or NULL if something
 840    goes wrong.  */
 841
 842 static struct tree_niter_desc *
 843 niter_for_single_dom_exit (struct ivopts_data *data)
 844 {
 845   edge exit = single_dom_exit (data->current_loop);
 846
 847   if (!exit)
 848     return NULL;
 849
 850   return niter_for_exit (data, exit);
 851 }
 852
 853 /* Initializes data structures used by the iv optimization pass, stored
 854    in DATA.  */
 855
 856 static void
 857 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 858 {
 859   data->version_info_size = 2 * num_ssa_names;
 860   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 861   data->relevant = BITMAP_ALLOC (NULL);
 862   data->important_candidates = BITMAP_ALLOC (NULL);
 863   data->max_inv_id = 0;
 864   data->niters = NULL;
 865   data->iv_uses.create (20);
 866   data->iv_candidates.create (20);
 867   data->inv_expr_tab.create (10);
 868   data->inv_expr_id = 0;
 869   decl_rtl_to_reset.create (20);
 870 }
 871
 872 /* Returns a memory object to that EXPR points.  In case we are able to
 873    determine that it does not point to any such object, NULL is returned.  */
 874
 875 static tree
 876 determine_base_object (tree expr)
 877 {
 878   enum tree_code code = TREE_CODE (expr);
 879   tree base, obj;
 880
 881   /* If this is a pointer casted to any type, we need to determine
 882      the base object for the pointer; so handle conversions before
 883      throwing away non-pointer expressions.  */
 884   if (CONVERT_EXPR_P (expr))
 885     return determine_base_object (TREE_OPERAND (expr, 0));
 886
 887   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 888     return NULL_TREE;
 889
 890   switch (code)
 891     {
 892     case INTEGER_CST:
 893       return NULL_TREE;
 894
 895     case ADDR_EXPR:
 896       obj = TREE_OPERAND (expr, 0);
 897       base = get_base_address (obj);
 898
 899       if (!base)
 900         return expr;
 901
 902       if (TREE_CODE (base) == MEM_REF)
 903         return determine_base_object (TREE_OPERAND (base, 0));
 904
 905       return fold_convert (ptr_type_node,
 906                            build_fold_addr_expr (base));
 907
 908     case POINTER_PLUS_EXPR:
 909       return determine_base_object (TREE_OPERAND (expr, 0));
 910
 911     case PLUS_EXPR:
 912     case MINUS_EXPR:
 913       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 914       gcc_unreachable ();
 915
 916     default:
 917       return fold_convert (ptr_type_node, expr);
 918     }
 919 }
 920
 921 /* Allocates an induction variable with given initial value BASE and step STEP
 922    for loop LOOP.  */
 923
 924 static struct iv *
 925 alloc_iv (tree base, tree step)
 926 {
 927   struct iv *iv = XCNEW (struct iv);
 928   gcc_assert (step != NULL_TREE);
 929
 930   iv->base = base;
 931   iv->base_object = determine_base_object (base);
 932   iv->step = step;
 933   iv->biv_p = false;
 934   iv->have_use_for = false;
 935   iv->use_id = 0;
 936   iv->ssa_name = NULL_TREE;
 937
 938   return iv;
 939 }
 940
 941 /* Sets STEP and BASE for induction variable IV.  */
 942
 943 static void
 944 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 945 {
 946   struct version_info *info = name_info (data, iv);
 947
 948   gcc_assert (!info->iv);
 949
 950   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 951   info->iv = alloc_iv (base, step);
 952   info->iv->ssa_name = iv;
 953 }
 954
 955 /* Finds induction variable declaration for VAR.  */
 956
 957 static struct iv *
 958 get_iv (struct ivopts_data *data, tree var)
 959 {
 960   basic_block bb;
 961   tree type = TREE_TYPE (var);
 962
 963   if (!POINTER_TYPE_P (type)
 964       && !INTEGRAL_TYPE_P (type))
 965     return NULL;
 966
 967   if (!name_info (data, var)->iv)
 968     {
 969       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 970
 971       if (!bb
 972           || !flow_bb_inside_loop_p (data->current_loop, bb))
 973         set_iv (data, var, var, build_int_cst (type, 0));
 974     }
 975
 976   return name_info (data, var)->iv;
 977 }
 978
 979 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 980    not define a simple affine biv with nonzero step.  */
 981
 982 static tree
 983 determine_biv_step (gimple phi)
 984 {
 985   struct loop *loop = gimple_bb (phi)->loop_father;
 986   tree name = PHI_RESULT (phi);
 987   affine_iv iv;
 988
 989   if (virtual_operand_p (name))
 990     return NULL_TREE;
 991
 992   if (!simple_iv (loop, loop, name, &iv, true))
 993     return NULL_TREE;
 994
 995   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 996 }
 997
 998 /* Finds basic ivs.  */
 999
1000 static bool
1001 find_bivs (struct ivopts_data *data)
1002 {
1003   gimple phi;
1004   tree step, type, base;
1005   bool found = false;
1006   struct loop *loop = data->current_loop;
1007   gimple_stmt_iterator psi;
1008
1009   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1010     {
1011       phi = gsi_stmt (psi);
1012
1013       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1014         continue;
1015
1016       step = determine_biv_step (phi);
1017       if (!step)
1018         continue;
1019
1020       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1021       base = expand_simple_operations (base);
1022       if (contains_abnormal_ssa_name_p (base)
1023           || contains_abnormal_ssa_name_p (step))
1024         continue;
1025
1026       type = TREE_TYPE (PHI_RESULT (phi));
1027       base = fold_convert (type, base);
1028       if (step)
1029         {
1030           if (POINTER_TYPE_P (type))
1031             step = convert_to_ptrofftype (step);
1032           else
1033             step = fold_convert (type, step);
1034         }
1035
1036       set_iv (data, PHI_RESULT (phi), base, step);
1037       found = true;
1038     }
1039
1040   return found;
1041 }
1042
1043 /* Marks basic ivs.  */
1044
1045 static void
1046 mark_bivs (struct ivopts_data *data)
1047 {
1048   gimple phi;
1049   tree var;
1050   struct iv *iv, *incr_iv;
1051   struct loop *loop = data->current_loop;
1052   basic_block incr_bb;
1053   gimple_stmt_iterator psi;
1054
1055   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1056     {
1057       phi = gsi_stmt (psi);
1058
1059       iv = get_iv (data, PHI_RESULT (phi));
1060       if (!iv)
1061         continue;
1062
1063       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1064       incr_iv = get_iv (data, var);
1065       if (!incr_iv)
1066         continue;
1067
1068       /* If the increment is in the subloop, ignore it.  */
1069       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1070       if (incr_bb->loop_father != data->current_loop
1071           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1072         continue;
1073
1074       iv->biv_p = true;
1075       incr_iv->biv_p = true;
1076     }
1077 }
1078
1079 /* Checks whether STMT defines a linear induction variable and stores its
1080    parameters to IV.  */
1081
1082 static bool
1083 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1084 {
1085   tree lhs;
1086   struct loop *loop = data->current_loop;
1087
1088   iv->base = NULL_TREE;
1089   iv->step = NULL_TREE;
1090
1091   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1092     return false;
1093
1094   lhs = gimple_assign_lhs (stmt);
1095   if (TREE_CODE (lhs) != SSA_NAME)
1096     return false;
1097
1098   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1099     return false;
1100   iv->base = expand_simple_operations (iv->base);
1101
1102   if (contains_abnormal_ssa_name_p (iv->base)
1103       || contains_abnormal_ssa_name_p (iv->step))
1104     return false;
1105
1106   /* If STMT could throw, then do not consider STMT as defining a GIV.
1107      While this will suppress optimizations, we can not safely delete this
1108      GIV and associated statements, even if it appears it is not used.  */
1109   if (stmt_could_throw_p (stmt))
1110     return false;
1111
1112   return true;
1113 }
1114
1115 /* Finds general ivs in statement STMT.  */
1116
1117 static void
1118 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1119 {
1120   affine_iv iv;
1121
1122   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1123     return;
1124
1125   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1126 }
1127
1128 /* Finds general ivs in basic block BB.  */
1129
1130 static void
1131 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1132 {
1133   gimple_stmt_iterator bsi;
1134
1135   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1136     find_givs_in_stmt (data, gsi_stmt (bsi));
1137 }
1138
1139 /* Finds general ivs.  */
1140
1141 static void
1142 find_givs (struct ivopts_data *data)
1143 {
1144   struct loop *loop = data->current_loop;
1145   basic_block *body = get_loop_body_in_dom_order (loop);
1146   unsigned i;
1147
1148   for (i = 0; i < loop->num_nodes; i++)
1149     find_givs_in_bb (data, body[i]);
1150   free (body);
1151 }
1152
1153 /* For each ssa name defined in LOOP determines whether it is an induction
1154    variable and if so, its initial value and step.  */
1155
1156 static bool
1157 find_induction_variables (struct ivopts_data *data)
1158 {
1159   unsigned i;
1160   bitmap_iterator bi;
1161
1162   if (!find_bivs (data))
1163     return false;
1164
1165   find_givs (data);
1166   mark_bivs (data);
1167
1168   if (dump_file && (dump_flags & TDF_DETAILS))
1169     {
1170       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1171
1172       if (niter)
1173         {
1174           fprintf (dump_file, "  number of iterations ");
1175           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1176           if (!integer_zerop (niter->may_be_zero))
1177             {
1178               fprintf (dump_file, "; zero if ");
1179               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1180             }
1181           fprintf (dump_file, "\n\n");
1182         };
1183
1184       fprintf (dump_file, "Induction variables:\n\n");
1185
1186       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1187         {
1188           if (ver_info (data, i)->iv)
1189             dump_iv (dump_file, ver_info (data, i)->iv);
1190         }
1191     }
1192
1193   return true;
1194 }
1195
1196 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1197
1198 static struct iv_use *
1199 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1200             gimple stmt, enum use_type use_type)
1201 {
1202   struct iv_use *use = XCNEW (struct iv_use);
1203
1204   use->id = n_iv_uses (data);
1205   use->type = use_type;
1206   use->iv = iv;
1207   use->stmt = stmt;
1208   use->op_p = use_p;
1209   use->related_cands = BITMAP_ALLOC (NULL);
1210
1211   /* To avoid showing ssa name in the dumps, if it was not reset by the
1212      caller.  */
1213   iv->ssa_name = NULL_TREE;
1214
1215   if (dump_file && (dump_flags & TDF_DETAILS))
1216     dump_use (dump_file, use);
1217
1218   data->iv_uses.safe_push (use);
1219
1220   return use;
1221 }
1222
1223 /* Checks whether OP is a loop-level invariant and if so, records it.
1224    NONLINEAR_USE is true if the invariant is used in a way we do not
1225    handle specially.  */
1226
1227 static void
1228 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1229 {
1230   basic_block bb;
1231   struct version_info *info;
1232
1233   if (TREE_CODE (op) != SSA_NAME
1234       || virtual_operand_p (op))
1235     return;
1236
1237   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1238   if (bb
1239       && flow_bb_inside_loop_p (data->current_loop, bb))
1240     return;
1241
1242   info = name_info (data, op);
1243   info->name = op;
1244   info->has_nonlin_use |= nonlinear_use;
1245   if (!info->inv_id)
1246     info->inv_id = ++data->max_inv_id;
1247   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1248 }
1249
1250 /* Checks whether the use OP is interesting and if so, records it.  */
1251
1252 static struct iv_use *
1253 find_interesting_uses_op (struct ivopts_data *data, tree op)
1254 {
1255   struct iv *iv;
1256   struct iv *civ;
1257   gimple stmt;
1258   struct iv_use *use;
1259
1260   if (TREE_CODE (op) != SSA_NAME)
1261     return NULL;
1262
1263   iv = get_iv (data, op);
1264   if (!iv)
1265     return NULL;
1266
1267   if (iv->have_use_for)
1268     {
1269       use = iv_use (data, iv->use_id);
1270
1271       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1272       return use;
1273     }
1274
1275   if (integer_zerop (iv->step))
1276     {
1277       record_invariant (data, op, true);
1278       return NULL;
1279     }
1280   iv->have_use_for = true;
1281
1282   civ = XNEW (struct iv);
1283   *civ = *iv;
1284
1285   stmt = SSA_NAME_DEF_STMT (op);
1286   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1287               || is_gimple_assign (stmt));
1288
1289   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1290   iv->use_id = use->id;
1291
1292   return use;
1293 }
1294
1295 /* Given a condition in statement STMT, checks whether it is a compare
1296    of an induction variable and an invariant.  If this is the case,
1297    CONTROL_VAR is set to location of the iv, BOUND to the location of
1298    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1299    induction variable descriptions, and true is returned.  If this is not
1300    the case, CONTROL_VAR and BOUND are set to the arguments of the
1301    condition and false is returned.  */
1302
1303 static bool
1304 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1305                        tree **control_var, tree **bound,
1306                        struct iv **iv_var, struct iv **iv_bound)
1307 {
1308   /* The objects returned when COND has constant operands.  */
1309   static struct iv const_iv;
1310   static tree zero;
1311   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1312   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1313   bool ret = false;
1314
1315   if (gimple_code (stmt) == GIMPLE_COND)
1316     {
1317       op0 = gimple_cond_lhs_ptr (stmt);
1318       op1 = gimple_cond_rhs_ptr (stmt);
1319     }
1320   else
1321     {
1322       op0 = gimple_assign_rhs1_ptr (stmt);
1323       op1 = gimple_assign_rhs2_ptr (stmt);
1324     }
1325
1326   zero = integer_zero_node;
1327   const_iv.step = integer_zero_node;
1328
1329   if (TREE_CODE (*op0) == SSA_NAME)
1330     iv0 = get_iv (data, *op0);
1331   if (TREE_CODE (*op1) == SSA_NAME)
1332     iv1 = get_iv (data, *op1);
1333
1334   /* Exactly one of the compared values must be an iv, and the other one must
1335      be an invariant.  */
1336   if (!iv0 || !iv1)
1337     goto end;
1338
1339   if (integer_zerop (iv0->step))
1340     {
1341       /* Control variable may be on the other side.  */
1342       tmp_op = op0; op0 = op1; op1 = tmp_op;
1343       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1344     }
1345   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1346
1347 end:
1348   if (control_var)
1349     *control_var = op0;;
1350   if (iv_var)
1351     *iv_var = iv0;;
1352   if (bound)
1353     *bound = op1;
1354   if (iv_bound)
1355     *iv_bound = iv1;
1356
1357   return ret;
1358 }
1359
1360 /* Checks whether the condition in STMT is interesting and if so,
1361    records it.  */
1362
1363 static void
1364 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1365 {
1366   tree *var_p, *bound_p;
1367   struct iv *var_iv, *civ;
1368
1369   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1370     {
1371       find_interesting_uses_op (data, *var_p);
1372       find_interesting_uses_op (data, *bound_p);
1373       return;
1374     }
1375
1376   civ = XNEW (struct iv);
1377   *civ = *var_iv;
1378   record_use (data, NULL, civ, stmt, USE_COMPARE);
1379 }
1380
1381 /* Returns the outermost loop EXPR is obviously invariant in
1382    relative to the loop LOOP, i.e. if all its operands are defined
1383    outside of the returned loop.  Returns NULL if EXPR is not
1384    even obviously invariant in LOOP.  */
1385
1386 struct loop *
1387 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1388 {
1389   basic_block def_bb;
1390   unsigned i, len;
1391
1392   if (is_gimple_min_invariant (expr))
1393     return current_loops->tree_root;
1394
1395   if (TREE_CODE (expr) == SSA_NAME)
1396     {
1397       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1398       if (def_bb)
1399         {
1400           if (flow_bb_inside_loop_p (loop, def_bb))
1401             return NULL;
1402           return superloop_at_depth (loop,
1403                                      loop_depth (def_bb->loop_father) + 1);
1404         }
1405
1406       return current_loops->tree_root;
1407     }
1408
1409   if (!EXPR_P (expr))
1410     return NULL;
1411
1412   unsigned maxdepth = 0;
1413   len = TREE_OPERAND_LENGTH (expr);
1414   for (i = 0; i < len; i++)
1415     {
1416       struct loop *ivloop;
1417       if (!TREE_OPERAND (expr, i))
1418         continue;
1419
1420       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1421       if (!ivloop)
1422         return NULL;
1423       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1424     }
1425
1426   return superloop_at_depth (loop, maxdepth);
1427 }
1428
1429 /* Returns true if expression EXPR is obviously invariant in LOOP,
1430    i.e. if all its operands are defined outside of the LOOP.  LOOP
1431    should not be the function body.  */
1432
1433 bool
1434 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1435 {
1436   basic_block def_bb;
1437   unsigned i, len;
1438
1439   gcc_assert (loop_depth (loop) > 0);
1440
1441   if (is_gimple_min_invariant (expr))
1442     return true;
1443
1444   if (TREE_CODE (expr) == SSA_NAME)
1445     {
1446       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1447       if (def_bb
1448           && flow_bb_inside_loop_p (loop, def_bb))
1449         return false;
1450
1451       return true;
1452     }
1453
1454   if (!EXPR_P (expr))
1455     return false;
1456
1457   len = TREE_OPERAND_LENGTH (expr);
1458   for (i = 0; i < len; i++)
1459     if (TREE_OPERAND (expr, i)
1460         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1461       return false;
1462
1463   return true;
1464 }
1465
1466 /* Cumulates the steps of indices into DATA and replaces their values with the
1467    initial ones.  Returns false when the value of the index cannot be determined.
1468    Callback for for_each_index.  */
1469
1470 struct ifs_ivopts_data
1471 {
1472   struct ivopts_data *ivopts_data;
1473   gimple stmt;
1474   tree step;
1475 };
1476
1477 static bool
1478 idx_find_step (tree base, tree *idx, void *data)
1479 {
1480   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1481   struct iv *iv;
1482   tree step, iv_base, iv_step, lbound, off;
1483   struct loop *loop = dta->ivopts_data->current_loop;
1484
1485   /* If base is a component ref, require that the offset of the reference
1486      be invariant.  */
1487   if (TREE_CODE (base) == COMPONENT_REF)
1488     {
1489       off = component_ref_field_offset (base);
1490       return expr_invariant_in_loop_p (loop, off);
1491     }
1492
1493   /* If base is array, first check whether we will be able to move the
1494      reference out of the loop (in order to take its address in strength
1495      reduction).  In order for this to work we need both lower bound
1496      and step to be loop invariants.  */
1497   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1498     {
1499       /* Moreover, for a range, the size needs to be invariant as well.  */
1500       if (TREE_CODE (base) == ARRAY_RANGE_REF
1501           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1502         return false;
1503
1504       step = array_ref_element_size (base);
1505       lbound = array_ref_low_bound (base);
1506
1507       if (!expr_invariant_in_loop_p (loop, step)
1508           || !expr_invariant_in_loop_p (loop, lbound))
1509         return false;
1510     }
1511
1512   if (TREE_CODE (*idx) != SSA_NAME)
1513     return true;
1514
1515   iv = get_iv (dta->ivopts_data, *idx);
1516   if (!iv)
1517     return false;
1518
1519   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1520           *&x[0], which is not folded and does not trigger the
1521           ARRAY_REF path below.  */
1522   *idx = iv->base;
1523
1524   if (integer_zerop (iv->step))
1525     return true;
1526
1527   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1528     {
1529       step = array_ref_element_size (base);
1530
1531       /* We only handle addresses whose step is an integer constant.  */
1532       if (TREE_CODE (step) != INTEGER_CST)
1533         return false;
1534     }
1535   else
1536     /* The step for pointer arithmetics already is 1 byte.  */
1537     step = size_one_node;
1538
1539   iv_base = iv->base;
1540   iv_step = iv->step;
1541   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1542                             sizetype, &iv_base, &iv_step, dta->stmt,
1543                             false))
1544     {
1545       /* The index might wrap.  */
1546       return false;
1547     }
1548
1549   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1550   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1551
1552   return true;
1553 }
1554
1555 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1556    object is passed to it in DATA.  */
1557
1558 static bool
1559 idx_record_use (tree base, tree *idx,
1560                 void *vdata)
1561 {
1562   struct ivopts_data *data = (struct ivopts_data *) vdata;
1563   find_interesting_uses_op (data, *idx);
1564   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1565     {
1566       find_interesting_uses_op (data, array_ref_element_size (base));
1567       find_interesting_uses_op (data, array_ref_low_bound (base));
1568     }
1569   return true;
1570 }
1571
1572 /* If we can prove that TOP = cst * BOT for some constant cst,
1573    store cst to MUL and return true.  Otherwise return false.
1574    The returned value is always sign-extended, regardless of the
1575    signedness of TOP and BOT.  */
1576
1577 static bool
1578 constant_multiple_of (tree top, tree bot, double_int *mul)
1579 {
1580   tree mby;
1581   enum tree_code code;
1582   double_int res, p0, p1;
1583   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1584
1585   STRIP_NOPS (top);
1586   STRIP_NOPS (bot);
1587
1588   if (operand_equal_p (top, bot, 0))
1589     {
1590       *mul = double_int_one;
1591       return true;
1592     }
1593
1594   code = TREE_CODE (top);
1595   switch (code)
1596     {
1597     case MULT_EXPR:
1598       mby = TREE_OPERAND (top, 1);
1599       if (TREE_CODE (mby) != INTEGER_CST)
1600         return false;
1601
1602       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1603         return false;
1604
1605       *mul = (res * tree_to_double_int (mby)).sext (precision);
1606       return true;
1607
1608     case PLUS_EXPR:
1609     case MINUS_EXPR:
1610       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1611           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1612         return false;
1613
1614       if (code == MINUS_EXPR)
1615         p1 = -p1;
1616       *mul = (p0 + p1).sext (precision);
1617       return true;
1618
1619     case INTEGER_CST:
1620       if (TREE_CODE (bot) != INTEGER_CST)
1621         return false;
1622
1623       p0 = tree_to_double_int (top).sext (precision);
1624       p1 = tree_to_double_int (bot).sext (precision);
1625       if (p1.is_zero ())
1626         return false;
1627       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1628       return res.is_zero ();
1629
1630     default:
1631       return false;
1632     }
1633 }
1634
1635 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1636
1637 static bool
1638 may_be_unaligned_p (tree ref, tree step)
1639 {
1640   tree base;
1641   tree base_type;
1642   HOST_WIDE_INT bitsize;
1643   HOST_WIDE_INT bitpos;
1644   tree toffset;
1645   enum machine_mode mode;
1646   int unsignedp, volatilep;
1647   unsigned base_align;
1648
1649   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1650      thus they are not misaligned.  */
1651   if (TREE_CODE (ref) == TARGET_MEM_REF)
1652     return false;
1653
1654   /* The test below is basically copy of what expr.c:normal_inner_ref
1655      does to check whether the object must be loaded by parts when
1656      STRICT_ALIGNMENT is true.  */
1657   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1658                               &unsignedp, &volatilep, true);
1659   base_type = TREE_TYPE (base);
1660   base_align = get_object_alignment (base);
1661   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1662
1663   if (mode != BLKmode)
1664     {
1665       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1666
1667       if (base_align < mode_align
1668           || (bitpos % mode_align) != 0
1669           || (bitpos % BITS_PER_UNIT) != 0)
1670         return true;
1671
1672       if (toffset
1673           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1674         return true;
1675
1676       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1677         return true;
1678     }
1679
1680   return false;
1681 }
1682
1683 /* Return true if EXPR may be non-addressable.   */
1684
1685 bool
1686 may_be_nonaddressable_p (tree expr)
1687 {
1688   switch (TREE_CODE (expr))
1689     {
1690     case TARGET_MEM_REF:
1691       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1692          target, thus they are always addressable.  */
1693       return false;
1694
1695     case COMPONENT_REF:
1696       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1697              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1698
1699     case VIEW_CONVERT_EXPR:
1700       /* This kind of view-conversions may wrap non-addressable objects
1701          and make them look addressable.  After some processing the
1702          non-addressability may be uncovered again, causing ADDR_EXPRs
1703          of inappropriate objects to be built.  */
1704       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1705           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1706         return true;
1707
1708       /* ... fall through ... */
1709
1710     case ARRAY_REF:
1711     case ARRAY_RANGE_REF:
1712       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1713
1714     CASE_CONVERT:
1715       return true;
1716
1717     default:
1718       break;
1719     }
1720
1721   return false;
1722 }
1723
1724 /* Finds addresses in *OP_P inside STMT.  */
1725
1726 static void
1727 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1728 {
1729   tree base = *op_p, step = size_zero_node;
1730   struct iv *civ;
1731   struct ifs_ivopts_data ifs_ivopts_data;
1732
1733   /* Do not play with volatile memory references.  A bit too conservative,
1734      perhaps, but safe.  */
1735   if (gimple_has_volatile_ops (stmt))
1736     goto fail;
1737
1738   /* Ignore bitfields for now.  Not really something terribly complicated
1739      to handle.  TODO.  */
1740   if (TREE_CODE (base) == BIT_FIELD_REF)
1741     goto fail;
1742
1743   base = unshare_expr (base);
1744
1745   if (TREE_CODE (base) == TARGET_MEM_REF)
1746     {
1747       tree type = build_pointer_type (TREE_TYPE (base));
1748       tree astep;
1749
1750       if (TMR_BASE (base)
1751           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1752         {
1753           civ = get_iv (data, TMR_BASE (base));
1754           if (!civ)
1755             goto fail;
1756
1757           TMR_BASE (base) = civ->base;
1758           step = civ->step;
1759         }
1760       if (TMR_INDEX2 (base)
1761           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1762         {
1763           civ = get_iv (data, TMR_INDEX2 (base));
1764           if (!civ)
1765             goto fail;
1766
1767           TMR_INDEX2 (base) = civ->base;
1768           step = civ->step;
1769         }
1770       if (TMR_INDEX (base)
1771           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1772         {
1773           civ = get_iv (data, TMR_INDEX (base));
1774           if (!civ)
1775             goto fail;
1776
1777           TMR_INDEX (base) = civ->base;
1778           astep = civ->step;
1779
1780           if (astep)
1781             {
1782               if (TMR_STEP (base))
1783                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1784
1785               step = fold_build2 (PLUS_EXPR, type, step, astep);
1786             }
1787         }
1788
1789       if (integer_zerop (step))
1790         goto fail;
1791       base = tree_mem_ref_addr (type, base);
1792     }
1793   else
1794     {
1795       ifs_ivopts_data.ivopts_data = data;
1796       ifs_ivopts_data.stmt = stmt;
1797       ifs_ivopts_data.step = size_zero_node;
1798       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1799           || integer_zerop (ifs_ivopts_data.step))
1800         goto fail;
1801       step = ifs_ivopts_data.step;
1802
1803       /* Check that the base expression is addressable.  This needs
1804          to be done after substituting bases of IVs into it.  */
1805       if (may_be_nonaddressable_p (base))
1806         goto fail;
1807
1808       /* Moreover, on strict alignment platforms, check that it is
1809          sufficiently aligned.  */
1810       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1811         goto fail;
1812
1813       base = build_fold_addr_expr (base);
1814
1815       /* Substituting bases of IVs into the base expression might
1816          have caused folding opportunities.  */
1817       if (TREE_CODE (base) == ADDR_EXPR)
1818         {
1819           tree *ref = &TREE_OPERAND (base, 0);
1820           while (handled_component_p (*ref))
1821             ref = &TREE_OPERAND (*ref, 0);
1822           if (TREE_CODE (*ref) == MEM_REF)
1823             {
1824               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1825                                       TREE_OPERAND (*ref, 0),
1826                                       TREE_OPERAND (*ref, 1));
1827               if (tem)
1828                 *ref = tem;
1829             }
1830         }
1831     }
1832
1833   civ = alloc_iv (base, step);
1834   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1835   return;
1836
1837 fail:
1838   for_each_index (op_p, idx_record_use, data);
1839 }
1840
1841 /* Finds and records invariants used in STMT.  */
1842
1843 static void
1844 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1845 {
1846   ssa_op_iter iter;
1847   use_operand_p use_p;
1848   tree op;
1849
1850   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1851     {
1852       op = USE_FROM_PTR (use_p);
1853       record_invariant (data, op, false);
1854     }
1855 }
1856
1857 /* Finds interesting uses of induction variables in the statement STMT.  */
1858
1859 static void
1860 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1861 {
1862   struct iv *iv;
1863   tree op, *lhs, *rhs;
1864   ssa_op_iter iter;
1865   use_operand_p use_p;
1866   enum tree_code code;
1867
1868   find_invariants_stmt (data, stmt);
1869
1870   if (gimple_code (stmt) == GIMPLE_COND)
1871     {
1872       find_interesting_uses_cond (data, stmt);
1873       return;
1874     }
1875
1876   if (is_gimple_assign (stmt))
1877     {
1878       lhs = gimple_assign_lhs_ptr (stmt);
1879       rhs = gimple_assign_rhs1_ptr (stmt);
1880
1881       if (TREE_CODE (*lhs) == SSA_NAME)
1882         {
1883           /* If the statement defines an induction variable, the uses are not
1884              interesting by themselves.  */
1885
1886           iv = get_iv (data, *lhs);
1887
1888           if (iv && !integer_zerop (iv->step))
1889             return;
1890         }
1891
1892       code = gimple_assign_rhs_code (stmt);
1893       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1894           && (REFERENCE_CLASS_P (*rhs)
1895               || is_gimple_val (*rhs)))
1896         {
1897           if (REFERENCE_CLASS_P (*rhs))
1898             find_interesting_uses_address (data, stmt, rhs);
1899           else
1900             find_interesting_uses_op (data, *rhs);
1901
1902           if (REFERENCE_CLASS_P (*lhs))
1903             find_interesting_uses_address (data, stmt, lhs);
1904           return;
1905         }
1906       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1907         {
1908           find_interesting_uses_cond (data, stmt);
1909           return;
1910         }
1911
1912       /* TODO -- we should also handle address uses of type
1913
1914          memory = call (whatever);
1915
1916          and
1917
1918          call (memory).  */
1919     }
1920
1921   if (gimple_code (stmt) == GIMPLE_PHI
1922       && gimple_bb (stmt) == data->current_loop->header)
1923     {
1924       iv = get_iv (data, PHI_RESULT (stmt));
1925
1926       if (iv && !integer_zerop (iv->step))
1927         return;
1928     }
1929
1930   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1931     {
1932       op = USE_FROM_PTR (use_p);
1933
1934       if (TREE_CODE (op) != SSA_NAME)
1935         continue;
1936
1937       iv = get_iv (data, op);
1938       if (!iv)
1939         continue;
1940
1941       find_interesting_uses_op (data, op);
1942     }
1943 }
1944
1945 /* Finds interesting uses of induction variables outside of loops
1946    on loop exit edge EXIT.  */
1947
1948 static void
1949 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1950 {
1951   gimple phi;
1952   gimple_stmt_iterator psi;
1953   tree def;
1954
1955   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1956     {
1957       phi = gsi_stmt (psi);
1958       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1959       if (!virtual_operand_p (def))
1960         find_interesting_uses_op (data, def);
1961     }
1962 }
1963
1964 /* Finds uses of the induction variables that are interesting.  */
1965
1966 static void
1967 find_interesting_uses (struct ivopts_data *data)
1968 {
1969   basic_block bb;
1970   gimple_stmt_iterator bsi;
1971   basic_block *body = get_loop_body (data->current_loop);
1972   unsigned i;
1973   struct version_info *info;
1974   edge e;
1975
1976   if (dump_file && (dump_flags & TDF_DETAILS))
1977     fprintf (dump_file, "Uses:\n\n");
1978
1979   for (i = 0; i < data->current_loop->num_nodes; i++)
1980     {
1981       edge_iterator ei;
1982       bb = body[i];
1983
1984       FOR_EACH_EDGE (e, ei, bb->succs)
1985         if (e->dest != EXIT_BLOCK_PTR
1986             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1987           find_interesting_uses_outside (data, e);
1988
1989       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1990         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1991       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1992         if (!is_gimple_debug (gsi_stmt (bsi)))
1993           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1994     }
1995
1996   if (dump_file && (dump_flags & TDF_DETAILS))
1997     {
1998       bitmap_iterator bi;
1999
2000       fprintf (dump_file, "\n");
2001
2002       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2003         {
2004           info = ver_info (data, i);
2005           if (info->inv_id)
2006             {
2007               fprintf (dump_file, "  ");
2008               print_generic_expr (dump_file, info->name, TDF_SLIM);
2009               fprintf (dump_file, " is invariant (%d)%s\n",
2010                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2011             }
2012         }
2013
2014       fprintf (dump_file, "\n");
2015     }
2016
2017   free (body);
2018 }
2019
2020 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2021    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2022    we are at the top-level of the processed address.  */
2023
2024 static tree
2025 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2026                 HOST_WIDE_INT *offset)
2027 {
2028   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2029   enum tree_code code;
2030   tree type, orig_type = TREE_TYPE (expr);
2031   HOST_WIDE_INT off0, off1, st;
2032   tree orig_expr = expr;
2033
2034   STRIP_NOPS (expr);
2035
2036   type = TREE_TYPE (expr);
2037   code = TREE_CODE (expr);
2038   *offset = 0;
2039
2040   switch (code)
2041     {
2042     case INTEGER_CST:
2043       if (!cst_and_fits_in_hwi (expr)
2044           || integer_zerop (expr))
2045         return orig_expr;
2046
2047       *offset = int_cst_value (expr);
2048       return build_int_cst (orig_type, 0);
2049
2050     case POINTER_PLUS_EXPR:
2051     case PLUS_EXPR:
2052     case MINUS_EXPR:
2053       op0 = TREE_OPERAND (expr, 0);
2054       op1 = TREE_OPERAND (expr, 1);
2055
2056       op0 = strip_offset_1 (op0, false, false, &off0);
2057       op1 = strip_offset_1 (op1, false, false, &off1);
2058
2059       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2060       if (op0 == TREE_OPERAND (expr, 0)
2061           && op1 == TREE_OPERAND (expr, 1))
2062         return orig_expr;
2063
2064       if (integer_zerop (op1))
2065         expr = op0;
2066       else if (integer_zerop (op0))
2067         {
2068           if (code == MINUS_EXPR)
2069             expr = fold_build1 (NEGATE_EXPR, type, op1);
2070           else
2071             expr = op1;
2072         }
2073       else
2074         expr = fold_build2 (code, type, op0, op1);
2075
2076       return fold_convert (orig_type, expr);
2077
2078     case MULT_EXPR:
2079       op1 = TREE_OPERAND (expr, 1);
2080       if (!cst_and_fits_in_hwi (op1))
2081         return orig_expr;
2082
2083       op0 = TREE_OPERAND (expr, 0);
2084       op0 = strip_offset_1 (op0, false, false, &off0);
2085       if (op0 == TREE_OPERAND (expr, 0))
2086         return orig_expr;
2087
2088       *offset = off0 * int_cst_value (op1);
2089       if (integer_zerop (op0))
2090         expr = op0;
2091       else
2092         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2093
2094       return fold_convert (orig_type, expr);
2095
2096     case ARRAY_REF:
2097     case ARRAY_RANGE_REF:
2098       if (!inside_addr)
2099         return orig_expr;
2100
2101       step = array_ref_element_size (expr);
2102       if (!cst_and_fits_in_hwi (step))
2103         break;
2104
2105       st = int_cst_value (step);
2106       op1 = TREE_OPERAND (expr, 1);
2107       op1 = strip_offset_1 (op1, false, false, &off1);
2108       *offset = off1 * st;
2109
2110       if (top_compref
2111           && integer_zerop (op1))
2112         {
2113           /* Strip the component reference completely.  */
2114           op0 = TREE_OPERAND (expr, 0);
2115           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2116           *offset += off0;
2117           return op0;
2118         }
2119       break;
2120
2121     case COMPONENT_REF:
2122       {
2123         tree field;
2124
2125         if (!inside_addr)
2126           return orig_expr;
2127
2128         tmp = component_ref_field_offset (expr);
2129         field = TREE_OPERAND (expr, 1);
2130         if (top_compref
2131             && cst_and_fits_in_hwi (tmp)
2132             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2133           {
2134             HOST_WIDE_INT boffset, abs_off;
2135
2136             /* Strip the component reference completely.  */
2137             op0 = TREE_OPERAND (expr, 0);
2138             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2139             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2140             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2141             if (boffset < 0)
2142               abs_off = -abs_off;
2143
2144             *offset = off0 + int_cst_value (tmp) + abs_off;
2145             return op0;
2146           }
2147       }
2148       break;
2149
2150     case ADDR_EXPR:
2151       op0 = TREE_OPERAND (expr, 0);
2152       op0 = strip_offset_1 (op0, true, true, &off0);
2153       *offset += off0;
2154
2155       if (op0 == TREE_OPERAND (expr, 0))
2156         return orig_expr;
2157
2158       expr = build_fold_addr_expr (op0);
2159       return fold_convert (orig_type, expr);
2160
2161     case MEM_REF:
2162       /* ???  Offset operand?  */
2163       inside_addr = false;
2164       break;
2165
2166     default:
2167       return orig_expr;
2168     }
2169
2170   /* Default handling of expressions for that we want to recurse into
2171      the first operand.  */
2172   op0 = TREE_OPERAND (expr, 0);
2173   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2174   *offset += off0;
2175
2176   if (op0 == TREE_OPERAND (expr, 0)
2177       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2178     return orig_expr;
2179
2180   expr = copy_node (expr);
2181   TREE_OPERAND (expr, 0) = op0;
2182   if (op1)
2183     TREE_OPERAND (expr, 1) = op1;
2184
2185   /* Inside address, we might strip the top level component references,
2186      thus changing type of the expression.  Handling of ADDR_EXPR
2187      will fix that.  */
2188   expr = fold_convert (orig_type, expr);
2189
2190   return expr;
2191 }
2192
2193 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2194
2195 static tree
2196 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2197 {
2198   HOST_WIDE_INT off;
2199   tree core = strip_offset_1 (expr, false, false, &off);
2200   *offset = off;
2201   return core;
2202 }
2203
2204 /* Returns variant of TYPE that can be used as base for different uses.
2205    We return unsigned type with the same precision, which avoids problems
2206    with overflows.  */
2207
2208 static tree
2209 generic_type_for (tree type)
2210 {
2211   if (POINTER_TYPE_P (type))
2212     return unsigned_type_for (type);
2213
2214   if (TYPE_UNSIGNED (type))
2215     return type;
2216
2217   return unsigned_type_for (type);
2218 }
2219
2220 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2221    the bitmap to that we should store it.  */
2222
2223 static struct ivopts_data *fd_ivopts_data;
2224 static tree
2225 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2226 {
2227   bitmap *depends_on = (bitmap *) data;
2228   struct version_info *info;
2229
2230   if (TREE_CODE (*expr_p) != SSA_NAME)
2231     return NULL_TREE;
2232   info = name_info (fd_ivopts_data, *expr_p);
2233
2234   if (!info->inv_id || info->has_nonlin_use)
2235     return NULL_TREE;
2236
2237   if (!*depends_on)
2238     *depends_on = BITMAP_ALLOC (NULL);
2239   bitmap_set_bit (*depends_on, info->inv_id);
2240
2241   return NULL_TREE;
2242 }
2243
2244 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2245    position to POS.  If USE is not NULL, the candidate is set as related to
2246    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2247    replacement of the final value of the iv by a direct computation.  */
2248
2249 static struct iv_cand *
2250 add_candidate_1 (struct ivopts_data *data,
2251                  tree base, tree step, bool important, enum iv_position pos,
2252                  struct iv_use *use, gimple incremented_at)
2253 {
2254   unsigned i;
2255   struct iv_cand *cand = NULL;
2256   tree type, orig_type;
2257
2258   /* For non-original variables, make sure their values are computed in a type
2259      that does not invoke undefined behavior on overflows (since in general,
2260      we cannot prove that these induction variables are non-wrapping).  */
2261   if (pos != IP_ORIGINAL)
2262     {
2263       orig_type = TREE_TYPE (base);
2264       type = generic_type_for (orig_type);
2265       if (type != orig_type)
2266         {
2267           base = fold_convert (type, base);
2268           step = fold_convert (type, step);
2269         }
2270     }
2271
2272   for (i = 0; i < n_iv_cands (data); i++)
2273     {
2274       cand = iv_cand (data, i);
2275
2276       if (cand->pos != pos)
2277         continue;
2278
2279       if (cand->incremented_at != incremented_at
2280           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2281               && cand->ainc_use != use))
2282         continue;
2283
2284       if (!cand->iv)
2285         {
2286           if (!base && !step)
2287             break;
2288
2289           continue;
2290         }
2291
2292       if (!base && !step)
2293         continue;
2294
2295       if (operand_equal_p (base, cand->iv->base, 0)
2296           && operand_equal_p (step, cand->iv->step, 0)
2297           && (TYPE_PRECISION (TREE_TYPE (base))
2298               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2299         break;
2300     }
2301
2302   if (i == n_iv_cands (data))
2303     {
2304       cand = XCNEW (struct iv_cand);
2305       cand->id = i;
2306
2307       if (!base && !step)
2308         cand->iv = NULL;
2309       else
2310         cand->iv = alloc_iv (base, step);
2311
2312       cand->pos = pos;
2313       if (pos != IP_ORIGINAL && cand->iv)
2314         {
2315           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2316           cand->var_after = cand->var_before;
2317         }
2318       cand->important = important;
2319       cand->incremented_at = incremented_at;
2320       data->iv_candidates.safe_push (cand);
2321
2322       if (step
2323           && TREE_CODE (step) != INTEGER_CST)
2324         {
2325           fd_ivopts_data = data;
2326           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2327         }
2328
2329       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2330         cand->ainc_use = use;
2331       else
2332         cand->ainc_use = NULL;
2333
2334       if (dump_file && (dump_flags & TDF_DETAILS))
2335         dump_cand (dump_file, cand);
2336     }
2337
2338   if (important && !cand->important)
2339     {
2340       cand->important = true;
2341       if (dump_file && (dump_flags & TDF_DETAILS))
2342         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2343     }
2344
2345   if (use)
2346     {
2347       bitmap_set_bit (use->related_cands, i);
2348       if (dump_file && (dump_flags & TDF_DETAILS))
2349         fprintf (dump_file, "Candidate %d is related to use %d\n",
2350                  cand->id, use->id);
2351     }
2352
2353   return cand;
2354 }
2355
2356 /* Returns true if incrementing the induction variable at the end of the LOOP
2357    is allowed.
2358
2359    The purpose is to avoid splitting latch edge with a biv increment, thus
2360    creating a jump, possibly confusing other optimization passes and leaving
2361    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2362    is not available (so we do not have a better alternative), or if the latch
2363    edge is already nonempty.  */
2364
2365 static bool
2366 allow_ip_end_pos_p (struct loop *loop)
2367 {
2368   if (!ip_normal_pos (loop))
2369     return true;
2370
2371   if (!empty_block_p (ip_end_pos (loop)))
2372     return true;
2373
2374   return false;
2375 }
2376
2377 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2378    Important field is set to IMPORTANT.  */
2379
2380 static void
2381 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2382                         bool important, struct iv_use *use)
2383 {
2384   basic_block use_bb = gimple_bb (use->stmt);
2385   enum machine_mode mem_mode;
2386   unsigned HOST_WIDE_INT cstepi;
2387
2388   /* If we insert the increment in any position other than the standard
2389      ones, we must ensure that it is incremented once per iteration.
2390      It must not be in an inner nested loop, or one side of an if
2391      statement.  */
2392   if (use_bb->loop_father != data->current_loop
2393       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2394       || stmt_could_throw_p (use->stmt)
2395       || !cst_and_fits_in_hwi (step))
2396     return;
2397
2398   cstepi = int_cst_value (step);
2399
2400   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2401   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2402         || USE_STORE_PRE_INCREMENT (mem_mode))
2403        && GET_MODE_SIZE (mem_mode) == cstepi)
2404       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2405            || USE_STORE_PRE_DECREMENT (mem_mode))
2406           && GET_MODE_SIZE (mem_mode) == -cstepi))
2407     {
2408       enum tree_code code = MINUS_EXPR;
2409       tree new_base;
2410       tree new_step = step;
2411
2412       if (POINTER_TYPE_P (TREE_TYPE (base)))
2413         {
2414           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2415           code = POINTER_PLUS_EXPR;
2416         }
2417       else
2418         new_step = fold_convert (TREE_TYPE (base), new_step);
2419       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2420       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2421                        use->stmt);
2422     }
2423   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2424         || USE_STORE_POST_INCREMENT (mem_mode))
2425        && GET_MODE_SIZE (mem_mode) == cstepi)
2426       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2427            || USE_STORE_POST_DECREMENT (mem_mode))
2428           && GET_MODE_SIZE (mem_mode) == -cstepi))
2429     {
2430       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2431                        use->stmt);
2432     }
2433 }
2434
2435 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2436    position to POS.  If USE is not NULL, the candidate is set as related to
2437    it.  The candidate computation is scheduled on all available positions.  */
2438
2439 static void
2440 add_candidate (struct ivopts_data *data,
2441                tree base, tree step, bool important, struct iv_use *use)
2442 {
2443   if (ip_normal_pos (data->current_loop))
2444     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2445   if (ip_end_pos (data->current_loop)
2446       && allow_ip_end_pos_p (data->current_loop))
2447     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2448
2449   if (use != NULL && use->type == USE_ADDRESS)
2450     add_autoinc_candidates (data, base, step, important, use);
2451 }
2452
2453 /* Adds standard iv candidates.  */
2454
2455 static void
2456 add_standard_iv_candidates (struct ivopts_data *data)
2457 {
2458   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2459
2460   /* The same for a double-integer type if it is still fast enough.  */
2461   if (TYPE_PRECISION
2462         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2463       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2464     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2465                    build_int_cst (long_integer_type_node, 1), true, NULL);
2466
2467   /* The same for a double-integer type if it is still fast enough.  */
2468   if (TYPE_PRECISION
2469         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2470       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2471     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2472                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2473 }
2474
2475
2476 /* Adds candidates bases on the old induction variable IV.  */
2477
2478 static void
2479 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2480 {
2481   gimple phi;
2482   tree def;
2483   struct iv_cand *cand;
2484
2485   add_candidate (data, iv->base, iv->step, true, NULL);
2486
2487   /* The same, but with initial value zero.  */
2488   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2489     add_candidate (data, size_int (0), iv->step, true, NULL);
2490   else
2491     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2492                    iv->step, true, NULL);
2493
2494   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2495   if (gimple_code (phi) == GIMPLE_PHI)
2496     {
2497       /* Additionally record the possibility of leaving the original iv
2498          untouched.  */
2499       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2500       cand = add_candidate_1 (data,
2501                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2502                               SSA_NAME_DEF_STMT (def));
2503       cand->var_before = iv->ssa_name;
2504       cand->var_after = def;
2505     }
2506 }
2507
2508 /* Adds candidates based on the old induction variables.  */
2509
2510 static void
2511 add_old_ivs_candidates (struct ivopts_data *data)
2512 {
2513   unsigned i;
2514   struct iv *iv;
2515   bitmap_iterator bi;
2516
2517   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2518     {
2519       iv = ver_info (data, i)->iv;
2520       if (iv && iv->biv_p && !integer_zerop (iv->step))
2521         add_old_iv_candidates (data, iv);
2522     }
2523 }
2524
2525 /* Adds candidates based on the value of the induction variable IV and USE.  */
2526
2527 static void
2528 add_iv_value_candidates (struct ivopts_data *data,
2529                          struct iv *iv, struct iv_use *use)
2530 {
2531   unsigned HOST_WIDE_INT offset;
2532   tree base;
2533   tree basetype;
2534
2535   add_candidate (data, iv->base, iv->step, false, use);
2536
2537   /* The same, but with initial value zero.  Make such variable important,
2538      since it is generic enough so that possibly many uses may be based
2539      on it.  */
2540   basetype = TREE_TYPE (iv->base);
2541   if (POINTER_TYPE_P (basetype))
2542     basetype = sizetype;
2543   add_candidate (data, build_int_cst (basetype, 0),
2544                  iv->step, true, use);
2545
2546   /* Third, try removing the constant offset.  Make sure to even
2547      add a candidate for &a[0] vs. (T *)&a.  */
2548   base = strip_offset (iv->base, &offset);
2549   if (offset
2550       || base != iv->base)
2551     add_candidate (data, base, iv->step, false, use);
2552 }
2553
2554 /* Adds candidates based on the uses.  */
2555
2556 static void
2557 add_derived_ivs_candidates (struct ivopts_data *data)
2558 {
2559   unsigned i;
2560
2561   for (i = 0; i < n_iv_uses (data); i++)
2562     {
2563       struct iv_use *use = iv_use (data, i);
2564
2565       if (!use)
2566         continue;
2567
2568       switch (use->type)
2569         {
2570         case USE_NONLINEAR_EXPR:
2571         case USE_COMPARE:
2572         case USE_ADDRESS:
2573           /* Just add the ivs based on the value of the iv used here.  */
2574           add_iv_value_candidates (data, use->iv, use);
2575           break;
2576
2577         default:
2578           gcc_unreachable ();
2579         }
2580     }
2581 }
2582
2583 /* Record important candidates and add them to related_cands bitmaps
2584    if needed.  */
2585
2586 static void
2587 record_important_candidates (struct ivopts_data *data)
2588 {
2589   unsigned i;
2590   struct iv_use *use;
2591
2592   for (i = 0; i < n_iv_cands (data); i++)
2593     {
2594       struct iv_cand *cand = iv_cand (data, i);
2595
2596       if (cand->important)
2597         bitmap_set_bit (data->important_candidates, i);
2598     }
2599
2600   data->consider_all_candidates = (n_iv_cands (data)
2601                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2602
2603   if (data->consider_all_candidates)
2604     {
2605       /* We will not need "related_cands" bitmaps in this case,
2606          so release them to decrease peak memory consumption.  */
2607       for (i = 0; i < n_iv_uses (data); i++)
2608         {
2609           use = iv_use (data, i);
2610           BITMAP_FREE (use->related_cands);
2611         }
2612     }
2613   else
2614     {
2615       /* Add important candidates to the related_cands bitmaps.  */
2616       for (i = 0; i < n_iv_uses (data); i++)
2617         bitmap_ior_into (iv_use (data, i)->related_cands,
2618                          data->important_candidates);
2619     }
2620 }
2621
2622 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2623    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2624    we allocate a simple list to every use.  */
2625
2626 static void
2627 alloc_use_cost_map (struct ivopts_data *data)
2628 {
2629   unsigned i, size, s;
2630
2631   for (i = 0; i < n_iv_uses (data); i++)
2632     {
2633       struct iv_use *use = iv_use (data, i);
2634
2635       if (data->consider_all_candidates)
2636         size = n_iv_cands (data);
2637       else
2638         {
2639           s = bitmap_count_bits (use->related_cands);
2640
2641           /* Round up to the power of two, so that moduling by it is fast.  */
2642           size = s ? (1 << ceil_log2 (s)) : 1;
2643         }
2644
2645       use->n_map_members = size;
2646       use->cost_map = XCNEWVEC (struct cost_pair, size);
2647     }
2648 }
2649
2650 /* Returns description of computation cost of expression whose runtime
2651    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2652
2653 static comp_cost
2654 new_cost (unsigned runtime, unsigned complexity)
2655 {
2656   comp_cost cost;
2657
2658   cost.cost = runtime;
2659   cost.complexity = complexity;
2660
2661   return cost;
2662 }
2663
2664 /* Adds costs COST1 and COST2.  */
2665
2666 static comp_cost
2667 add_costs (comp_cost cost1, comp_cost cost2)
2668 {
2669   cost1.cost += cost2.cost;
2670   cost1.complexity += cost2.complexity;
2671
2672   return cost1;
2673 }
2674 /* Subtracts costs COST1 and COST2.  */
2675
2676 static comp_cost
2677 sub_costs (comp_cost cost1, comp_cost cost2)
2678 {
2679   cost1.cost -= cost2.cost;
2680   cost1.complexity -= cost2.complexity;
2681
2682   return cost1;
2683 }
2684
2685 /* Returns a negative number if COST1 < COST2, a positive number if
2686    COST1 > COST2, and 0 if COST1 = COST2.  */
2687
2688 static int
2689 compare_costs (comp_cost cost1, comp_cost cost2)
2690 {
2691   if (cost1.cost == cost2.cost)
2692     return cost1.complexity - cost2.complexity;
2693
2694   return cost1.cost - cost2.cost;
2695 }
2696
2697 /* Returns true if COST is infinite.  */
2698
2699 static bool
2700 infinite_cost_p (comp_cost cost)
2701 {
2702   return cost.cost == INFTY;
2703 }
2704
2705 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2706    on invariants DEPENDS_ON and that the value used in expressing it
2707    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2708
2709 static void
2710 set_use_iv_cost (struct ivopts_data *data,
2711                  struct iv_use *use, struct iv_cand *cand,
2712                  comp_cost cost, bitmap depends_on, tree value,
2713                  enum tree_code comp, int inv_expr_id)
2714 {
2715   unsigned i, s;
2716
2717   if (infinite_cost_p (cost))
2718     {
2719       BITMAP_FREE (depends_on);
2720       return;
2721     }
2722
2723   if (data->consider_all_candidates)
2724     {
2725       use->cost_map[cand->id].cand = cand;
2726       use->cost_map[cand->id].cost = cost;
2727       use->cost_map[cand->id].depends_on = depends_on;
2728       use->cost_map[cand->id].value = value;
2729       use->cost_map[cand->id].comp = comp;
2730       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2731       return;
2732     }
2733
2734   /* n_map_members is a power of two, so this computes modulo.  */
2735   s = cand->id & (use->n_map_members - 1);
2736   for (i = s; i < use->n_map_members; i++)
2737     if (!use->cost_map[i].cand)
2738       goto found;
2739   for (i = 0; i < s; i++)
2740     if (!use->cost_map[i].cand)
2741       goto found;
2742
2743   gcc_unreachable ();
2744
2745 found:
2746   use->cost_map[i].cand = cand;
2747   use->cost_map[i].cost = cost;
2748   use->cost_map[i].depends_on = depends_on;
2749   use->cost_map[i].value = value;
2750   use->cost_map[i].comp = comp;
2751   use->cost_map[i].inv_expr_id = inv_expr_id;
2752 }
2753
2754 /* Gets cost of (USE, CANDIDATE) pair.  */
2755
2756 static struct cost_pair *
2757 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2758                  struct iv_cand *cand)
2759 {
2760   unsigned i, s;
2761   struct cost_pair *ret;
2762
2763   if (!cand)
2764     return NULL;
2765
2766   if (data->consider_all_candidates)
2767     {
2768       ret = use->cost_map + cand->id;
2769       if (!ret->cand)
2770         return NULL;
2771
2772       return ret;
2773     }
2774
2775   /* n_map_members is a power of two, so this computes modulo.  */
2776   s = cand->id & (use->n_map_members - 1);
2777   for (i = s; i < use->n_map_members; i++)
2778     if (use->cost_map[i].cand == cand)
2779       return use->cost_map + i;
2780     else if (use->cost_map[i].cand == NULL)
2781       return NULL;
2782   for (i = 0; i < s; i++)
2783     if (use->cost_map[i].cand == cand)
2784       return use->cost_map + i;
2785     else if (use->cost_map[i].cand == NULL)
2786       return NULL;
2787
2788   return NULL;
2789 }
2790
2791 /* Returns estimate on cost of computing SEQ.  */
2792
2793 static unsigned
2794 seq_cost (rtx seq, bool speed)
2795 {
2796   unsigned cost = 0;
2797   rtx set;
2798
2799   for (; seq; seq = NEXT_INSN (seq))
2800     {
2801       set = single_set (seq);
2802       if (set)
2803         cost += set_src_cost (SET_SRC (set), speed);
2804       else
2805         cost++;
2806     }
2807
2808   return cost;
2809 }
2810
2811 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2812 static rtx
2813 produce_memory_decl_rtl (tree obj, int *regno)
2814 {
2815   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2816   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2817   rtx x;
2818
2819   gcc_assert (obj);
2820   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2821     {
2822       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2823       x = gen_rtx_SYMBOL_REF (address_mode, name);
2824       SET_SYMBOL_REF_DECL (x, obj);
2825       x = gen_rtx_MEM (DECL_MODE (obj), x);
2826       set_mem_addr_space (x, as);
2827       targetm.encode_section_info (obj, x, true);
2828     }
2829   else
2830     {
2831       x = gen_raw_REG (address_mode, (*regno)++);
2832       x = gen_rtx_MEM (DECL_MODE (obj), x);
2833       set_mem_addr_space (x, as);
2834     }
2835
2836   return x;
2837 }
2838
2839 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2840    walk_tree.  DATA contains the actual fake register number.  */
2841
2842 static tree
2843 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2844 {
2845   tree obj = NULL_TREE;
2846   rtx x = NULL_RTX;
2847   int *regno = (int *) data;
2848
2849   switch (TREE_CODE (*expr_p))
2850     {
2851     case ADDR_EXPR:
2852       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2853            handled_component_p (*expr_p);
2854            expr_p = &TREE_OPERAND (*expr_p, 0))
2855         continue;
2856       obj = *expr_p;
2857       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2858         x = produce_memory_decl_rtl (obj, regno);
2859       break;
2860
2861     case SSA_NAME:
2862       *ws = 0;
2863       obj = SSA_NAME_VAR (*expr_p);
2864       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2865       if (!obj)
2866         return NULL_TREE;
2867       if (!DECL_RTL_SET_P (obj))
2868         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2869       break;
2870
2871     case VAR_DECL:
2872     case PARM_DECL:
2873     case RESULT_DECL:
2874       *ws = 0;
2875       obj = *expr_p;
2876
2877       if (DECL_RTL_SET_P (obj))
2878         break;
2879
2880       if (DECL_MODE (obj) == BLKmode)
2881         x = produce_memory_decl_rtl (obj, regno);
2882       else
2883         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2884
2885       break;
2886
2887     default:
2888       break;
2889     }
2890
2891   if (x)
2892     {
2893       decl_rtl_to_reset.safe_push (obj);
2894       SET_DECL_RTL (obj, x);
2895     }
2896
2897   return NULL_TREE;
2898 }
2899
2900 /* Determines cost of the computation of EXPR.  */
2901
2902 static unsigned
2903 computation_cost (tree expr, bool speed)
2904 {
2905   rtx seq, rslt;
2906   tree type = TREE_TYPE (expr);
2907   unsigned cost;
2908   /* Avoid using hard regs in ways which may be unsupported.  */
2909   int regno = LAST_VIRTUAL_REGISTER + 1;
2910   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2911   enum node_frequency real_frequency = node->frequency;
2912
2913   node->frequency = NODE_FREQUENCY_NORMAL;
2914   crtl->maybe_hot_insn_p = speed;
2915   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2916   start_sequence ();
2917   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2918   seq = get_insns ();
2919   end_sequence ();
2920   default_rtl_profile ();
2921   node->frequency = real_frequency;
2922
2923   cost = seq_cost (seq, speed);
2924   if (MEM_P (rslt))
2925     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2926                           TYPE_ADDR_SPACE (type), speed);
2927   else if (!REG_P (rslt))
2928     cost += set_src_cost (rslt, speed);
2929
2930   return cost;
2931 }
2932
2933 /* Returns variable containing the value of candidate CAND at statement AT.  */
2934
2935 static tree
2936 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2937 {
2938   if (stmt_after_increment (loop, cand, stmt))
2939     return cand->var_after;
2940   else
2941     return cand->var_before;
2942 }
2943
2944 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2945    same precision that is at least as wide as the precision of TYPE, stores
2946    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2947    type of A and B.  */
2948
2949 static tree
2950 determine_common_wider_type (tree *a, tree *b)
2951 {
2952   tree wider_type = NULL;
2953   tree suba, subb;
2954   tree atype = TREE_TYPE (*a);
2955
2956   if (CONVERT_EXPR_P (*a))
2957     {
2958       suba = TREE_OPERAND (*a, 0);
2959       wider_type = TREE_TYPE (suba);
2960       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2961         return atype;
2962     }
2963   else
2964     return atype;
2965
2966   if (CONVERT_EXPR_P (*b))
2967     {
2968       subb = TREE_OPERAND (*b, 0);
2969       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2970         return atype;
2971     }
2972   else
2973     return atype;
2974
2975   *a = suba;
2976   *b = subb;
2977   return wider_type;
2978 }
2979
2980 /* Determines the expression by that USE is expressed from induction variable
2981    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2982    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2983
2984 static bool
2985 get_computation_aff (struct loop *loop,
2986                      struct iv_use *use, struct iv_cand *cand, gimple at,
2987                      struct affine_tree_combination *aff)
2988 {
2989   tree ubase = use->iv->base;
2990   tree ustep = use->iv->step;
2991   tree cbase = cand->iv->base;
2992   tree cstep = cand->iv->step, cstep_common;
2993   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2994   tree common_type, var;
2995   tree uutype;
2996   aff_tree cbase_aff, var_aff;
2997   double_int rat;
2998
2999   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3000     {
3001       /* We do not have a precision to express the values of use.  */
3002       return false;
3003     }
3004
3005   var = var_at_stmt (loop, cand, at);
3006   uutype = unsigned_type_for (utype);
3007
3008   /* If the conversion is not noop, perform it.  */
3009   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3010     {
3011       cstep = fold_convert (uutype, cstep);
3012       cbase = fold_convert (uutype, cbase);
3013       var = fold_convert (uutype, var);
3014     }
3015
3016   if (!constant_multiple_of (ustep, cstep, &rat))
3017     return false;
3018
3019   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3020      type, we achieve better folding by computing their difference in this
3021      wider type, and cast the result to UUTYPE.  We do not need to worry about
3022      overflows, as all the arithmetics will in the end be performed in UUTYPE
3023      anyway.  */
3024   common_type = determine_common_wider_type (&ubase, &cbase);
3025
3026   /* use = ubase - ratio * cbase + ratio * var.  */
3027   tree_to_aff_combination (ubase, common_type, aff);
3028   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3029   tree_to_aff_combination (var, uutype, &var_aff);
3030
3031   /* We need to shift the value if we are after the increment.  */
3032   if (stmt_after_increment (loop, cand, at))
3033     {
3034       aff_tree cstep_aff;
3035
3036       if (common_type != uutype)
3037         cstep_common = fold_convert (common_type, cstep);
3038       else
3039         cstep_common = cstep;
3040
3041       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3042       aff_combination_add (&cbase_aff, &cstep_aff);
3043     }
3044
3045   aff_combination_scale (&cbase_aff, -rat);
3046   aff_combination_add (aff, &cbase_aff);
3047   if (common_type != uutype)
3048     aff_combination_convert (aff, uutype);
3049
3050   aff_combination_scale (&var_aff, rat);
3051   aff_combination_add (aff, &var_aff);
3052
3053   return true;
3054 }
3055
3056 /* Return the type of USE.  */
3057
3058 static tree
3059 get_use_type (struct iv_use *use)
3060 {
3061   tree base_type = TREE_TYPE (use->iv->base);
3062   tree type;
3063
3064   if (use->type == USE_ADDRESS)
3065     {
3066       /* The base_type may be a void pointer.  Create a pointer type based on
3067          the mem_ref instead.  */
3068       type = build_pointer_type (TREE_TYPE (*use->op_p));
3069       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3070                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3071     }
3072   else
3073     type = base_type;
3074
3075   return type;
3076 }
3077
3078 /* Determines the expression by that USE is expressed from induction variable
3079    CAND at statement AT in LOOP.  The computation is unshared.  */
3080
3081 static tree
3082 get_computation_at (struct loop *loop,
3083                     struct iv_use *use, struct iv_cand *cand, gimple at)
3084 {
3085   aff_tree aff;
3086   tree type = get_use_type (use);
3087
3088   if (!get_computation_aff (loop, use, cand, at, &aff))
3089     return NULL_TREE;
3090   unshare_aff_combination (&aff);
3091   return fold_convert (type, aff_combination_to_tree (&aff));
3092 }
3093
3094 /* Determines the expression by that USE is expressed from induction variable
3095    CAND in LOOP.  The computation is unshared.  */
3096
3097 static tree
3098 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3099 {
3100   return get_computation_at (loop, use, cand, use->stmt);
3101 }
3102
3103 /* Adjust the cost COST for being in loop setup rather than loop body.
3104    If we're optimizing for space, the loop setup overhead is constant;
3105    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3106 static unsigned
3107 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3108 {
3109   if (cost == INFTY)
3110     return cost;
3111   else if (optimize_loop_for_speed_p (data->current_loop))
3112     return cost / avg_loop_niter (data->current_loop);
3113   else
3114     return cost;
3115 }
3116
3117 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3118    validity for a memory reference accessing memory of mode MODE in
3119    address space AS.  */
3120
3121
3122 bool
3123 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3124                                  addr_space_t as)
3125 {
3126 #define MAX_RATIO 128
3127   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3128   static vec<sbitmap> valid_mult_list;
3129   sbitmap valid_mult;
3130
3131   if (data_index >= valid_mult_list.length ())
3132     valid_mult_list.safe_grow_cleared (data_index + 1);
3133
3134   valid_mult = valid_mult_list[data_index];
3135   if (!valid_mult)
3136     {
3137       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3138       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3139       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3140       rtx addr, scaled;
3141       HOST_WIDE_INT i;
3142
3143       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3144       bitmap_clear (valid_mult);
3145       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3146       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3147       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3148         {
3149           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3150           if (memory_address_addr_space_p (mode, addr, as)
3151               || memory_address_addr_space_p (mode, scaled, as))
3152             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3153         }
3154
3155       if (dump_file && (dump_flags & TDF_DETAILS))
3156         {
3157           fprintf (dump_file, "  allowed multipliers:");
3158           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3159             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3160               fprintf (dump_file, " %d", (int) i);
3161           fprintf (dump_file, "\n");
3162           fprintf (dump_file, "\n");
3163         }
3164
3165       valid_mult_list[data_index] = valid_mult;
3166     }
3167
3168   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3169     return false;
3170
3171   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3172 }
3173
3174 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3175    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3176    variable is omitted.  Compute the cost for a memory reference that accesses
3177    a memory location of mode MEM_MODE in address space AS.
3178
3179    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3180    size of MEM_MODE / RATIO) is available.  To make this determination, we
3181    look at the size of the increment to be made, which is given in CSTEP.
3182    CSTEP may be zero if the step is unknown.
3183    STMT_AFTER_INC is true iff the statement we're looking at is after the
3184    increment of the original biv.
3185
3186    TODO -- there must be some better way.  This all is quite crude.  */
3187
3188 typedef struct address_cost_data_s
3189 {
3190   HOST_WIDE_INT min_offset, max_offset;
3191   unsigned costs[2][2][2][2];
3192 } *address_cost_data;
3193
3194
3195 static comp_cost
3196 get_address_cost (bool symbol_present, bool var_present,
3197                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3198                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3199                   addr_space_t as, bool speed,
3200                   bool stmt_after_inc, bool *may_autoinc)
3201 {
3202   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3203   static vec<address_cost_data> address_cost_data_list;
3204   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3205   address_cost_data data;
3206   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3207   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3208   unsigned cost, acost, complexity;
3209   bool offset_p, ratio_p, autoinc;
3210   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3211   unsigned HOST_WIDE_INT mask;
3212   unsigned bits;
3213
3214   if (data_index >= address_cost_data_list.length ())
3215     address_cost_data_list.safe_grow_cleared (data_index + 1);
3216
3217   data = address_cost_data_list[data_index];
3218   if (!data)
3219     {
3220       HOST_WIDE_INT i;
3221       HOST_WIDE_INT rat, off = 0;
3222       int old_cse_not_expected, width;
3223       unsigned sym_p, var_p, off_p, rat_p, add_c;
3224       rtx seq, addr, base;
3225       rtx reg0, reg1;
3226
3227       data = (address_cost_data) xcalloc (1, sizeof (*data));
3228
3229       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3230
3231       width = GET_MODE_BITSIZE (address_mode) - 1;
3232       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3233         width = HOST_BITS_PER_WIDE_INT - 1;
3234       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3235
3236       for (i = width; i >= 0; i--)
3237         {
3238           off = -((unsigned HOST_WIDE_INT) 1 << i);
3239           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3240           if (memory_address_addr_space_p (mem_mode, addr, as))
3241             break;
3242         }
3243       data->min_offset = (i == -1? 0 : off);
3244
3245       for (i = width; i >= 0; i--)
3246         {
3247           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3248           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3249           if (memory_address_addr_space_p (mem_mode, addr, as))
3250             break;
3251         }
3252       if (i == -1)
3253         off = 0;
3254       data->max_offset = off;
3255
3256       if (dump_file && (dump_flags & TDF_DETAILS))
3257         {
3258           fprintf (dump_file, "get_address_cost:\n");
3259           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3260                    GET_MODE_NAME (mem_mode),
3261                    data->min_offset);
3262           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3263                    GET_MODE_NAME (mem_mode),
3264                    data->max_offset);
3265         }
3266
3267       rat = 1;
3268       for (i = 2; i <= MAX_RATIO; i++)
3269         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3270           {
3271             rat = i;
3272             break;
3273           }
3274
3275       /* Compute the cost of various addressing modes.  */
3276       acost = 0;
3277       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3278       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3279
3280       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3281           || USE_STORE_PRE_DECREMENT (mem_mode))
3282         {
3283           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3284           has_predec[mem_mode]
3285             = memory_address_addr_space_p (mem_mode, addr, as);
3286         }
3287       if (USE_LOAD_POST_DECREMENT (mem_mode)
3288           || USE_STORE_POST_DECREMENT (mem_mode))
3289         {
3290           addr = gen_rtx_POST_DEC (address_mode, reg0);
3291           has_postdec[mem_mode]
3292             = memory_address_addr_space_p (mem_mode, addr, as);
3293         }
3294       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3295           || USE_STORE_PRE_DECREMENT (mem_mode))
3296         {
3297           addr = gen_rtx_PRE_INC (address_mode, reg0);
3298           has_preinc[mem_mode]
3299             = memory_address_addr_space_p (mem_mode, addr, as);
3300         }
3301       if (USE_LOAD_POST_INCREMENT (mem_mode)
3302           || USE_STORE_POST_INCREMENT (mem_mode))
3303         {
3304           addr = gen_rtx_POST_INC (address_mode, reg0);
3305           has_postinc[mem_mode]
3306             = memory_address_addr_space_p (mem_mode, addr, as);
3307         }
3308       for (i = 0; i < 16; i++)
3309         {
3310           sym_p = i & 1;
3311           var_p = (i >> 1) & 1;
3312           off_p = (i >> 2) & 1;
3313           rat_p = (i >> 3) & 1;
3314
3315           addr = reg0;
3316           if (rat_p)
3317             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3318                                    gen_int_mode (rat, address_mode));
3319
3320           if (var_p)
3321             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3322
3323           if (sym_p)
3324             {
3325               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3326               /* ??? We can run into trouble with some backends by presenting
3327                  it with symbols which haven't been properly passed through
3328                  targetm.encode_section_info.  By setting the local bit, we
3329                  enhance the probability of things working.  */
3330               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3331
3332               if (off_p)
3333                 base = gen_rtx_fmt_e (CONST, address_mode,
3334                                       gen_rtx_fmt_ee
3335                                         (PLUS, address_mode, base,
3336                                          gen_int_mode (off, address_mode)));
3337             }
3338           else if (off_p)
3339             base = gen_int_mode (off, address_mode);
3340           else
3341             base = NULL_RTX;
3342
3343           if (base)
3344             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3345
3346           start_sequence ();
3347           /* To avoid splitting addressing modes, pretend that no cse will
3348              follow.  */
3349           old_cse_not_expected = cse_not_expected;
3350           cse_not_expected = true;
3351           addr = memory_address_addr_space (mem_mode, addr, as);
3352           cse_not_expected = old_cse_not_expected;
3353           seq = get_insns ();
3354           end_sequence ();
3355
3356           acost = seq_cost (seq, speed);
3357           acost += address_cost (addr, mem_mode, as, speed);
3358
3359           if (!acost)
3360             acost = 1;
3361           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3362         }
3363
3364       /* On some targets, it is quite expensive to load symbol to a register,
3365          which makes addresses that contain symbols look much more expensive.
3366          However, the symbol will have to be loaded in any case before the
3367          loop (and quite likely we have it in register already), so it does not
3368          make much sense to penalize them too heavily.  So make some final
3369          tweaks for the SYMBOL_PRESENT modes:
3370
3371          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3372          var is cheaper, use this mode with small penalty.
3373          If VAR_PRESENT is true, try whether the mode with
3374          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3375          if this is the case, use it.  */
3376       add_c = add_cost (speed, address_mode);
3377       for (i = 0; i < 8; i++)
3378         {
3379           var_p = i & 1;
3380           off_p = (i >> 1) & 1;
3381           rat_p = (i >> 2) & 1;
3382
3383           acost = data->costs[0][1][off_p][rat_p] + 1;
3384           if (var_p)
3385             acost += add_c;
3386
3387           if (acost < data->costs[1][var_p][off_p][rat_p])
3388             data->costs[1][var_p][off_p][rat_p] = acost;
3389         }
3390
3391       if (dump_file && (dump_flags & TDF_DETAILS))
3392         {
3393           fprintf (dump_file, "Address costs:\n");
3394
3395           for (i = 0; i < 16; i++)
3396             {
3397               sym_p = i & 1;
3398               var_p = (i >> 1) & 1;
3399               off_p = (i >> 2) & 1;
3400               rat_p = (i >> 3) & 1;
3401
3402               fprintf (dump_file, "  ");
3403               if (sym_p)
3404                 fprintf (dump_file, "sym + ");
3405               if (var_p)
3406                 fprintf (dump_file, "var + ");
3407               if (off_p)
3408                 fprintf (dump_file, "cst + ");
3409               if (rat_p)
3410                 fprintf (dump_file, "rat * ");
3411
3412               acost = data->costs[sym_p][var_p][off_p][rat_p];
3413               fprintf (dump_file, "index costs %d\n", acost);
3414             }
3415           if (has_predec[mem_mode] || has_postdec[mem_mode]
3416               || has_preinc[mem_mode] || has_postinc[mem_mode])
3417             fprintf (dump_file, "  May include autoinc/dec\n");
3418           fprintf (dump_file, "\n");
3419         }
3420
3421       address_cost_data_list[data_index] = data;
3422     }
3423
3424   bits = GET_MODE_BITSIZE (address_mode);
3425   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3426   offset &= mask;
3427   if ((offset >> (bits - 1) & 1))
3428     offset |= ~mask;
3429   s_offset = offset;
3430
3431   autoinc = false;
3432   msize = GET_MODE_SIZE (mem_mode);
3433   autoinc_offset = offset;
3434   if (stmt_after_inc)
3435     autoinc_offset += ratio * cstep;
3436   if (symbol_present || var_present || ratio != 1)
3437     autoinc = false;
3438   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3439                && msize == cstep)
3440            || (has_postdec[mem_mode] && autoinc_offset == 0
3441                && msize == -cstep)
3442            || (has_preinc[mem_mode] && autoinc_offset == msize
3443                && msize == cstep)
3444            || (has_predec[mem_mode] && autoinc_offset == -msize
3445                && msize == -cstep))
3446     autoinc = true;
3447
3448   cost = 0;
3449   offset_p = (s_offset != 0
3450               && data->min_offset <= s_offset
3451               && s_offset <= data->max_offset);
3452   ratio_p = (ratio != 1
3453              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3454
3455   if (ratio != 1 && !ratio_p)
3456     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3457
3458   if (s_offset && !offset_p && !symbol_present)
3459     cost += add_cost (speed, address_mode);
3460
3461   if (may_autoinc)
3462     *may_autoinc = autoinc;
3463   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3464   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3465   return new_cost (cost + acost, complexity);
3466 }
3467
3468  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3469     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3470     calculating the operands of EXPR.  Returns true if successful, and returns
3471     the cost in COST.  */
3472
3473 static bool
3474 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3475                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3476 {
3477   comp_cost res;
3478   tree op1 = TREE_OPERAND (expr, 1);
3479   tree cst = TREE_OPERAND (mult, 1);
3480   tree multop = TREE_OPERAND (mult, 0);
3481   int m = exact_log2 (int_cst_value (cst));
3482   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3483   int sa_cost;
3484
3485   if (!(m >= 0 && m < maxm))
3486     return false;
3487
3488   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3489              ? shiftadd_cost (speed, mode, m)
3490              : (mult == op1
3491                 ? shiftsub1_cost (speed, mode, m)
3492                 : shiftsub0_cost (speed, mode, m)));
3493   res = new_cost (sa_cost, 0);
3494   res = add_costs (res, mult == op1 ? cost0 : cost1);
3495
3496   STRIP_NOPS (multop);
3497   if (!is_gimple_val (multop))
3498     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3499
3500   *cost = res;
3501   return true;
3502 }
3503
3504 /* Estimates cost of forcing expression EXPR into a variable.  */
3505
3506 static comp_cost
3507 force_expr_to_var_cost (tree expr, bool speed)
3508 {
3509   static bool costs_initialized = false;
3510   static unsigned integer_cost [2];
3511   static unsigned symbol_cost [2];
3512   static unsigned address_cost [2];
3513   tree op0, op1;
3514   comp_cost cost0, cost1, cost;
3515   enum machine_mode mode;
3516
3517   if (!costs_initialized)
3518     {
3519       tree type = build_pointer_type (integer_type_node);
3520       tree var, addr;
3521       rtx x;
3522       int i;
3523
3524       var = create_tmp_var_raw (integer_type_node, "test_var");
3525       TREE_STATIC (var) = 1;
3526       x = produce_memory_decl_rtl (var, NULL);
3527       SET_DECL_RTL (var, x);
3528
3529       addr = build1 (ADDR_EXPR, type, var);
3530
3531
3532       for (i = 0; i < 2; i++)
3533         {
3534           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3535                                                              2000), i);
3536
3537           symbol_cost[i] = computation_cost (addr, i) + 1;
3538
3539           address_cost[i]
3540             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3541           if (dump_file && (dump_flags & TDF_DETAILS))
3542             {
3543               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3544               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3545               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3546               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3547               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3548               fprintf (dump_file, "\n");
3549             }
3550         }
3551
3552       costs_initialized = true;
3553     }
3554
3555   STRIP_NOPS (expr);
3556
3557   if (SSA_VAR_P (expr))
3558     return no_cost;
3559
3560   if (is_gimple_min_invariant (expr))
3561     {
3562       if (TREE_CODE (expr) == INTEGER_CST)
3563         return new_cost (integer_cost [speed], 0);
3564
3565       if (TREE_CODE (expr) == ADDR_EXPR)
3566         {
3567           tree obj = TREE_OPERAND (expr, 0);
3568
3569           if (TREE_CODE (obj) == VAR_DECL
3570               || TREE_CODE (obj) == PARM_DECL
3571               || TREE_CODE (obj) == RESULT_DECL)
3572             return new_cost (symbol_cost [speed], 0);
3573         }
3574
3575       return new_cost (address_cost [speed], 0);
3576     }
3577
3578   switch (TREE_CODE (expr))
3579     {
3580     case POINTER_PLUS_EXPR:
3581     case PLUS_EXPR:
3582     case MINUS_EXPR:
3583     case MULT_EXPR:
3584       op0 = TREE_OPERAND (expr, 0);
3585       op1 = TREE_OPERAND (expr, 1);
3586       STRIP_NOPS (op0);
3587       STRIP_NOPS (op1);
3588
3589       if (is_gimple_val (op0))
3590         cost0 = no_cost;
3591       else
3592         cost0 = force_expr_to_var_cost (op0, speed);
3593
3594       if (is_gimple_val (op1))
3595         cost1 = no_cost;
3596       else
3597         cost1 = force_expr_to_var_cost (op1, speed);
3598
3599       break;
3600
3601     case NEGATE_EXPR:
3602       op0 = TREE_OPERAND (expr, 0);
3603       STRIP_NOPS (op0);
3604       op1 = NULL_TREE;
3605
3606       if (is_gimple_val (op0))
3607         cost0 = no_cost;
3608       else
3609         cost0 = force_expr_to_var_cost (op0, speed);
3610
3611       cost1 = no_cost;
3612       break;
3613
3614     default:
3615       /* Just an arbitrary value, FIXME.  */
3616       return new_cost (target_spill_cost[speed], 0);
3617     }
3618
3619   mode = TYPE_MODE (TREE_TYPE (expr));
3620   switch (TREE_CODE (expr))
3621     {
3622     case POINTER_PLUS_EXPR:
3623     case PLUS_EXPR:
3624     case MINUS_EXPR:
3625     case NEGATE_EXPR:
3626       cost = new_cost (add_cost (speed, mode), 0);
3627       if (TREE_CODE (expr) != NEGATE_EXPR)
3628         {
3629           tree mult = NULL_TREE;
3630           comp_cost sa_cost;
3631           if (TREE_CODE (op1) == MULT_EXPR)
3632             mult = op1;
3633           else if (TREE_CODE (op0) == MULT_EXPR)
3634             mult = op0;
3635
3636           if (mult != NULL_TREE
3637               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3638               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3639                                     speed, &sa_cost))
3640             return sa_cost;
3641         }
3642       break;
3643
3644     case MULT_EXPR:
3645       if (cst_and_fits_in_hwi (op0))
3646         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3647                                              mode, speed), 0);
3648       else if (cst_and_fits_in_hwi (op1))
3649         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3650                                              mode, speed), 0);
3651       else
3652         return new_cost (target_spill_cost [speed], 0);
3653       break;
3654
3655     default:
3656       gcc_unreachable ();
3657     }
3658
3659   cost = add_costs (cost, cost0);
3660   cost = add_costs (cost, cost1);
3661
3662   /* Bound the cost by target_spill_cost.  The parts of complicated
3663      computations often are either loop invariant or at least can
3664      be shared between several iv uses, so letting this grow without
3665      limits would not give reasonable results.  */
3666   if (cost.cost > (int) target_spill_cost [speed])
3667     cost.cost = target_spill_cost [speed];
3668
3669   return cost;
3670 }
3671
3672 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3673    invariants the computation depends on.  */
3674
3675 static comp_cost
3676 force_var_cost (struct ivopts_data *data,
3677                 tree expr, bitmap *depends_on)
3678 {
3679   if (depends_on)
3680     {
3681       fd_ivopts_data = data;
3682       walk_tree (&expr, find_depends, depends_on, NULL);
3683     }
3684
3685   return force_expr_to_var_cost (expr, data->speed);
3686 }
3687
3688 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3689    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3690    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3691    invariants the computation depends on.  */
3692
3693 static comp_cost
3694 split_address_cost (struct ivopts_data *data,
3695                     tree addr, bool *symbol_present, bool *var_present,
3696                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3697 {
3698   tree core;
3699   HOST_WIDE_INT bitsize;
3700   HOST_WIDE_INT bitpos;
3701   tree toffset;
3702   enum machine_mode mode;
3703   int unsignedp, volatilep;
3704
3705   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3706                               &unsignedp, &volatilep, false);
3707
3708   if (toffset != 0
3709       || bitpos % BITS_PER_UNIT != 0
3710       || TREE_CODE (core) != VAR_DECL)
3711     {
3712       *symbol_present = false;
3713       *var_present = true;
3714       fd_ivopts_data = data;
3715       walk_tree (&addr, find_depends, depends_on, NULL);
3716       return new_cost (target_spill_cost[data->speed], 0);
3717     }
3718
3719   *offset += bitpos / BITS_PER_UNIT;
3720   if (TREE_STATIC (core)
3721       || DECL_EXTERNAL (core))
3722     {
3723       *symbol_present = true;
3724       *var_present = false;
3725       return no_cost;
3726     }
3727
3728   *symbol_present = false;
3729   *var_present = true;
3730   return no_cost;
3731 }
3732
3733 /* Estimates cost of expressing difference of addresses E1 - E2 as
3734    var + symbol + offset.  The value of offset is added to OFFSET,
3735    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3736    part is missing.  DEPENDS_ON is a set of the invariants the computation
3737    depends on.  */
3738
3739 static comp_cost
3740 ptr_difference_cost (struct ivopts_data *data,
3741                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3742                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3743 {
3744   HOST_WIDE_INT diff = 0;
3745   aff_tree aff_e1, aff_e2;
3746   tree type;
3747
3748   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3749
3750   if (ptr_difference_const (e1, e2, &diff))
3751     {
3752       *offset += diff;
3753       *symbol_present = false;
3754       *var_present = false;
3755       return no_cost;
3756     }
3757
3758   if (integer_zerop (e2))
3759     return split_address_cost (data, TREE_OPERAND (e1, 0),
3760                                symbol_present, var_present, offset, depends_on);
3761
3762   *symbol_present = false;
3763   *var_present = true;
3764
3765   type = signed_type_for (TREE_TYPE (e1));
3766   tree_to_aff_combination (e1, type, &aff_e1);
3767   tree_to_aff_combination (e2, type, &aff_e2);
3768   aff_combination_scale (&aff_e2, double_int_minus_one);
3769   aff_combination_add (&aff_e1, &aff_e2);
3770
3771   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3772 }
3773
3774 /* Estimates cost of expressing difference E1 - E2 as
3775    var + symbol + offset.  The value of offset is added to OFFSET,
3776    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3777    part is missing.  DEPENDS_ON is a set of the invariants the computation
3778    depends on.  */
3779
3780 static comp_cost
3781 difference_cost (struct ivopts_data *data,
3782                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3783                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3784 {
3785   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3786   unsigned HOST_WIDE_INT off1, off2;
3787   aff_tree aff_e1, aff_e2;
3788   tree type;
3789
3790   e1 = strip_offset (e1, &off1);
3791   e2 = strip_offset (e2, &off2);
3792   *offset += off1 - off2;
3793
3794   STRIP_NOPS (e1);
3795   STRIP_NOPS (e2);
3796
3797   if (TREE_CODE (e1) == ADDR_EXPR)
3798     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3799                                 offset, depends_on);
3800   *symbol_present = false;
3801
3802   if (operand_equal_p (e1, e2, 0))
3803     {
3804       *var_present = false;
3805       return no_cost;
3806     }
3807
3808   *var_present = true;
3809
3810   if (integer_zerop (e2))
3811     return force_var_cost (data, e1, depends_on);
3812
3813   if (integer_zerop (e1))
3814     {
3815       comp_cost cost = force_var_cost (data, e2, depends_on);
3816       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3817       return cost;
3818     }
3819
3820   type = signed_type_for (TREE_TYPE (e1));
3821   tree_to_aff_combination (e1, type, &aff_e1);
3822   tree_to_aff_combination (e2, type, &aff_e2);
3823   aff_combination_scale (&aff_e2, double_int_minus_one);
3824   aff_combination_add (&aff_e1, &aff_e2);
3825
3826   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3827 }
3828
3829 /* Returns true if AFF1 and AFF2 are identical.  */
3830
3831 static bool
3832 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3833 {
3834   unsigned i;
3835
3836   if (aff1->n != aff2->n)
3837     return false;
3838
3839   for (i = 0; i < aff1->n; i++)
3840     {
3841       if (aff1->elts[i].coef != aff2->elts[i].coef)
3842         return false;
3843
3844       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3845         return false;
3846     }
3847   return true;
3848 }
3849
3850 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3851
3852 static int
3853 get_expr_id (struct ivopts_data *data, tree expr)
3854 {
3855   struct iv_inv_expr_ent ent;
3856   struct iv_inv_expr_ent **slot;
3857
3858   ent.expr = expr;
3859   ent.hash = iterative_hash_expr (expr, 0);
3860   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3861   if (*slot)
3862     return (*slot)->id;
3863
3864   *slot = XNEW (struct iv_inv_expr_ent);
3865   (*slot)->expr = expr;
3866   (*slot)->hash = ent.hash;
3867   (*slot)->id = data->inv_expr_id++;
3868   return (*slot)->id;
3869 }
3870
3871 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3872    requires a new compiler generated temporary.  Returns -1 otherwise.
3873    ADDRESS_P is a flag indicating if the expression is for address
3874    computation.  */
3875
3876 static int
3877 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3878                             tree cbase, HOST_WIDE_INT ratio,
3879                             bool address_p)
3880 {
3881   aff_tree ubase_aff, cbase_aff;
3882   tree expr, ub, cb;
3883
3884   STRIP_NOPS (ubase);
3885   STRIP_NOPS (cbase);
3886   ub = ubase;
3887   cb = cbase;
3888
3889   if ((TREE_CODE (ubase) == INTEGER_CST)
3890       && (TREE_CODE (cbase) == INTEGER_CST))
3891     return -1;
3892
3893   /* Strips the constant part. */
3894   if (TREE_CODE (ubase) == PLUS_EXPR
3895       || TREE_CODE (ubase) == MINUS_EXPR
3896       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3897     {
3898       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3899         ubase = TREE_OPERAND (ubase, 0);
3900     }
3901
3902   /* Strips the constant part. */
3903   if (TREE_CODE (cbase) == PLUS_EXPR
3904       || TREE_CODE (cbase) == MINUS_EXPR
3905       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3906     {
3907       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3908         cbase = TREE_OPERAND (cbase, 0);
3909     }
3910
3911   if (address_p)
3912     {
3913       if (((TREE_CODE (ubase) == SSA_NAME)
3914            || (TREE_CODE (ubase) == ADDR_EXPR
3915                && is_gimple_min_invariant (ubase)))
3916           && (TREE_CODE (cbase) == INTEGER_CST))
3917         return -1;
3918
3919       if (((TREE_CODE (cbase) == SSA_NAME)
3920            || (TREE_CODE (cbase) == ADDR_EXPR
3921                && is_gimple_min_invariant (cbase)))
3922           && (TREE_CODE (ubase) == INTEGER_CST))
3923         return -1;
3924     }
3925
3926   if (ratio == 1)
3927     {
3928       if (operand_equal_p (ubase, cbase, 0))
3929         return -1;
3930
3931       if (TREE_CODE (ubase) == ADDR_EXPR
3932           && TREE_CODE (cbase) == ADDR_EXPR)
3933         {
3934           tree usym, csym;
3935
3936           usym = TREE_OPERAND (ubase, 0);
3937           csym = TREE_OPERAND (cbase, 0);
3938           if (TREE_CODE (usym) == ARRAY_REF)
3939             {
3940               tree ind = TREE_OPERAND (usym, 1);
3941               if (TREE_CODE (ind) == INTEGER_CST
3942                   && host_integerp (ind, 0)
3943                   && TREE_INT_CST_LOW (ind) == 0)
3944                 usym = TREE_OPERAND (usym, 0);
3945             }
3946           if (TREE_CODE (csym) == ARRAY_REF)
3947             {
3948               tree ind = TREE_OPERAND (csym, 1);
3949               if (TREE_CODE (ind) == INTEGER_CST
3950                   && host_integerp (ind, 0)
3951                   && TREE_INT_CST_LOW (ind) == 0)
3952                 csym = TREE_OPERAND (csym, 0);
3953             }
3954           if (operand_equal_p (usym, csym, 0))
3955             return -1;
3956         }
3957       /* Now do more complex comparison  */
3958       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3959       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3960       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3961         return -1;
3962     }
3963
3964   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3965   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3966
3967   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3968   aff_combination_add (&ubase_aff, &cbase_aff);
3969   expr = aff_combination_to_tree (&ubase_aff);
3970   return get_expr_id (data, expr);
3971 }
3972
3973
3974
3975 /* Determines the cost of the computation by that USE is expressed
3976    from induction variable CAND.  If ADDRESS_P is true, we just need
3977    to create an address from it, otherwise we want to get it into
3978    register.  A set of invariants we depend on is stored in
3979    DEPENDS_ON.  AT is the statement at that the value is computed.
3980    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3981    addressing is likely.  */
3982
3983 static comp_cost
3984 get_computation_cost_at (struct ivopts_data *data,
3985                          struct iv_use *use, struct iv_cand *cand,
3986                          bool address_p, bitmap *depends_on, gimple at,
3987                          bool *can_autoinc,
3988                          int *inv_expr_id)
3989 {
3990   tree ubase = use->iv->base, ustep = use->iv->step;
3991   tree cbase, cstep;
3992   tree utype = TREE_TYPE (ubase), ctype;
3993   unsigned HOST_WIDE_INT cstepi, offset = 0;
3994   HOST_WIDE_INT ratio, aratio;
3995   bool var_present, symbol_present, stmt_is_after_inc;
3996   comp_cost cost;
3997   double_int rat;
3998   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3999   enum machine_mode mem_mode = (address_p
4000                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4001                                 : VOIDmode);
4002
4003   *depends_on = NULL;
4004
4005   /* Only consider real candidates.  */
4006   if (!cand->iv)
4007     return infinite_cost;
4008
4009   cbase = cand->iv->base;
4010   cstep = cand->iv->step;
4011   ctype = TREE_TYPE (cbase);
4012
4013   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4014     {
4015       /* We do not have a precision to express the values of use.  */
4016       return infinite_cost;
4017     }
4018
4019   if (address_p
4020       || (use->iv->base_object
4021           && cand->iv->base_object
4022           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4023           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4024     {
4025       /* Do not try to express address of an object with computation based
4026          on address of a different object.  This may cause problems in rtl
4027          level alias analysis (that does not expect this to be happening,
4028          as this is illegal in C), and would be unlikely to be useful
4029          anyway.  */
4030       if (use->iv->base_object
4031           && cand->iv->base_object
4032           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4033         return infinite_cost;
4034     }
4035
4036   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4037     {
4038       /* TODO -- add direct handling of this case.  */
4039       goto fallback;
4040     }
4041
4042   /* CSTEPI is removed from the offset in case statement is after the
4043      increment.  If the step is not constant, we use zero instead.
4044      This is a bit imprecise (there is the extra addition), but
4045      redundancy elimination is likely to transform the code so that
4046      it uses value of the variable before increment anyway,
4047      so it is not that much unrealistic.  */
4048   if (cst_and_fits_in_hwi (cstep))
4049     cstepi = int_cst_value (cstep);
4050   else
4051     cstepi = 0;
4052
4053   if (!constant_multiple_of (ustep, cstep, &rat))
4054     return infinite_cost;
4055
4056   if (rat.fits_shwi ())
4057     ratio = rat.to_shwi ();
4058   else
4059     return infinite_cost;
4060
4061   STRIP_NOPS (cbase);
4062   ctype = TREE_TYPE (cbase);
4063
4064   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4065
4066   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4067      or ratio == 1, it is better to handle this like
4068
4069      ubase - ratio * cbase + ratio * var
4070
4071      (also holds in the case ratio == -1, TODO.  */
4072
4073   if (cst_and_fits_in_hwi (cbase))
4074     {
4075       offset = - ratio * int_cst_value (cbase);
4076       cost = difference_cost (data,
4077                               ubase, build_int_cst (utype, 0),
4078                               &symbol_present, &var_present, &offset,
4079                               depends_on);
4080       cost.cost /= avg_loop_niter (data->current_loop);
4081     }
4082   else if (ratio == 1)
4083     {
4084       tree real_cbase = cbase;
4085
4086       /* Check to see if any adjustment is needed.  */
4087       if (cstepi == 0 && stmt_is_after_inc)
4088         {
4089           aff_tree real_cbase_aff;
4090           aff_tree cstep_aff;
4091
4092           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4093                                    &real_cbase_aff);
4094           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4095
4096           aff_combination_add (&real_cbase_aff, &cstep_aff);
4097           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4098         }
4099
4100       cost = difference_cost (data,
4101                               ubase, real_cbase,
4102                               &symbol_present, &var_present, &offset,
4103                               depends_on);
4104       cost.cost /= avg_loop_niter (data->current_loop);
4105     }
4106   else if (address_p
4107            && !POINTER_TYPE_P (ctype)
4108            && multiplier_allowed_in_address_p
4109                 (ratio, mem_mode,
4110                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4111     {
4112       cbase
4113         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4114       cost = difference_cost (data,
4115                               ubase, cbase,
4116                               &symbol_present, &var_present, &offset,
4117                               depends_on);
4118       cost.cost /= avg_loop_niter (data->current_loop);
4119     }
4120   else
4121     {
4122       cost = force_var_cost (data, cbase, depends_on);
4123       cost = add_costs (cost,
4124                         difference_cost (data,
4125                                          ubase, build_int_cst (utype, 0),
4126                                          &symbol_present, &var_present,
4127                                          &offset, depends_on));
4128       cost.cost /= avg_loop_niter (data->current_loop);
4129       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4130     }
4131
4132   if (inv_expr_id)
4133     {
4134       *inv_expr_id =
4135           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4136       /* Clear depends on.  */
4137       if (*inv_expr_id != -1 && depends_on && *depends_on)
4138         bitmap_clear (*depends_on);
4139     }
4140
4141   /* If we are after the increment, the value of the candidate is higher by
4142      one iteration.  */
4143   if (stmt_is_after_inc)
4144     offset -= ratio * cstepi;
4145
4146   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4147      (symbol/var1/const parts may be omitted).  If we are looking for an
4148      address, find the cost of addressing this.  */
4149   if (address_p)
4150     return add_costs (cost,
4151                       get_address_cost (symbol_present, var_present,
4152                                         offset, ratio, cstepi,
4153                                         mem_mode,
4154                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4155                                         speed, stmt_is_after_inc,
4156                                         can_autoinc));
4157
4158   /* Otherwise estimate the costs for computing the expression.  */
4159   if (!symbol_present && !var_present && !offset)
4160     {
4161       if (ratio != 1)
4162         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4163       return cost;
4164     }
4165
4166   /* Symbol + offset should be compile-time computable so consider that they
4167       are added once to the variable, if present.  */
4168   if (var_present && (symbol_present || offset))
4169     cost.cost += adjust_setup_cost (data,
4170                                     add_cost (speed, TYPE_MODE (ctype)));
4171
4172   /* Having offset does not affect runtime cost in case it is added to
4173      symbol, but it increases complexity.  */
4174   if (offset)
4175     cost.complexity++;
4176
4177   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4178
4179   aratio = ratio > 0 ? ratio : -ratio;
4180   if (aratio != 1)
4181     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4182   return cost;
4183
4184 fallback:
4185   if (can_autoinc)
4186     *can_autoinc = false;
4187
4188   {
4189     /* Just get the expression, expand it and measure the cost.  */
4190     tree comp = get_computation_at (data->current_loop, use, cand, at);
4191
4192     if (!comp)
4193       return infinite_cost;
4194
4195     if (address_p)
4196       comp = build_simple_mem_ref (comp);
4197
4198     return new_cost (computation_cost (comp, speed), 0);
4199   }
4200 }
4201
4202 /* Determines the cost of the computation by that USE is expressed
4203    from induction variable CAND.  If ADDRESS_P is true, we just need
4204    to create an address from it, otherwise we want to get it into
4205    register.  A set of invariants we depend on is stored in
4206    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4207    autoinc addressing is likely.  */
4208
4209 static comp_cost
4210 get_computation_cost (struct ivopts_data *data,
4211                       struct iv_use *use, struct iv_cand *cand,
4212                       bool address_p, bitmap *depends_on,
4213                       bool *can_autoinc, int *inv_expr_id)
4214 {
4215   return get_computation_cost_at (data,
4216                                   use, cand, address_p, depends_on, use->stmt,
4217                                   can_autoinc, inv_expr_id);
4218 }
4219
4220 /* Determines cost of basing replacement of USE on CAND in a generic
4221    expression.  */
4222
4223 static bool
4224 determine_use_iv_cost_generic (struct ivopts_data *data,
4225                                struct iv_use *use, struct iv_cand *cand)
4226 {
4227   bitmap depends_on;
4228   comp_cost cost;
4229   int inv_expr_id = -1;
4230
4231   /* The simple case first -- if we need to express value of the preserved
4232      original biv, the cost is 0.  This also prevents us from counting the
4233      cost of increment twice -- once at this use and once in the cost of
4234      the candidate.  */
4235   if (cand->pos == IP_ORIGINAL
4236       && cand->incremented_at == use->stmt)
4237     {
4238       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4239                        ERROR_MARK, -1);
4240       return true;
4241     }
4242
4243   cost = get_computation_cost (data, use, cand, false, &depends_on,
4244                                NULL, &inv_expr_id);
4245
4246   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4247                    inv_expr_id);
4248
4249   return !infinite_cost_p (cost);
4250 }
4251
4252 /* Determines cost of basing replacement of USE on CAND in an address.  */
4253
4254 static bool
4255 determine_use_iv_cost_address (struct ivopts_data *data,
4256                                struct iv_use *use, struct iv_cand *cand)
4257 {
4258   bitmap depends_on;
4259   bool can_autoinc;
4260   int inv_expr_id = -1;
4261   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4262                                          &can_autoinc, &inv_expr_id);
4263
4264   if (cand->ainc_use == use)
4265     {
4266       if (can_autoinc)
4267         cost.cost -= cand->cost_step;
4268       /* If we generated the candidate solely for exploiting autoincrement
4269          opportunities, and it turns out it can't be used, set the cost to
4270          infinity to make sure we ignore it.  */
4271       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4272         cost = infinite_cost;
4273     }
4274   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4275                    inv_expr_id);
4276
4277   return !infinite_cost_p (cost);
4278 }
4279
4280 /* Computes value of candidate CAND at position AT in iteration NITER, and
4281    stores it to VAL.  */
4282
4283 static void
4284 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4285                aff_tree *val)
4286 {
4287   aff_tree step, delta, nit;
4288   struct iv *iv = cand->iv;
4289   tree type = TREE_TYPE (iv->base);
4290   tree steptype = type;
4291   if (POINTER_TYPE_P (type))
4292     steptype = sizetype;
4293
4294   tree_to_aff_combination (iv->step, steptype, &step);
4295   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4296   aff_combination_convert (&nit, steptype);
4297   aff_combination_mult (&nit, &step, &delta);
4298   if (stmt_after_increment (loop, cand, at))
4299     aff_combination_add (&delta, &step);
4300
4301   tree_to_aff_combination (iv->base, type, val);
4302   aff_combination_add (val, &delta);
4303 }
4304
4305 /* Returns period of induction variable iv.  */
4306
4307 static tree
4308 iv_period (struct iv *iv)
4309 {
4310   tree step = iv->step, period, type;
4311   tree pow2div;
4312
4313   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4314
4315   type = unsigned_type_for (TREE_TYPE (step));
4316   /* Period of the iv is lcm (step, type_range)/step -1,
4317      i.e., N*type_range/step - 1. Since type range is power
4318      of two, N == (step >> num_of_ending_zeros_binary (step),
4319      so the final result is
4320
4321        (type_range >> num_of_ending_zeros_binary (step)) - 1
4322
4323   */
4324   pow2div = num_ending_zeros (step);
4325
4326   period = build_low_bits_mask (type,
4327                                 (TYPE_PRECISION (type)
4328                                  - tree_low_cst (pow2div, 1)));
4329
4330   return period;
4331 }
4332
4333 /* Returns the comparison operator used when eliminating the iv USE.  */
4334
4335 static enum tree_code
4336 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4337 {
4338   struct loop *loop = data->current_loop;
4339   basic_block ex_bb;
4340   edge exit;
4341
4342   ex_bb = gimple_bb (use->stmt);
4343   exit = EDGE_SUCC (ex_bb, 0);
4344   if (flow_bb_inside_loop_p (loop, exit->dest))
4345     exit = EDGE_SUCC (ex_bb, 1);
4346
4347   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4348 }
4349
4350 static tree
4351 strip_wrap_conserving_type_conversions (tree exp)
4352 {
4353   while (tree_ssa_useless_type_conversion (exp)
4354          && (nowrap_type_p (TREE_TYPE (exp))
4355              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4356     exp = TREE_OPERAND (exp, 0);
4357   return exp;
4358 }
4359
4360 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4361    check for an exact match.  */
4362
4363 static bool
4364 expr_equal_p (tree e, tree what)
4365 {
4366   gimple stmt;
4367   enum tree_code code;
4368
4369   e = strip_wrap_conserving_type_conversions (e);
4370   what = strip_wrap_conserving_type_conversions (what);
4371
4372   code = TREE_CODE (what);
4373   if (TREE_TYPE (e) != TREE_TYPE (what))
4374     return false;
4375
4376   if (operand_equal_p (e, what, 0))
4377     return true;
4378
4379   if (TREE_CODE (e) != SSA_NAME)
4380     return false;
4381
4382   stmt = SSA_NAME_DEF_STMT (e);
4383   if (gimple_code (stmt) != GIMPLE_ASSIGN
4384       || gimple_assign_rhs_code (stmt) != code)
4385     return false;
4386
4387   switch (get_gimple_rhs_class (code))
4388     {
4389     case GIMPLE_BINARY_RHS:
4390       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4391         return false;
4392       /* Fallthru.  */
4393
4394     case GIMPLE_UNARY_RHS:
4395     case GIMPLE_SINGLE_RHS:
4396       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4397     default:
4398       return false;
4399     }
4400 }
4401
4402 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4403    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4404    calculation is performed in non-wrapping type.
4405
4406    TODO: More generally, we could test for the situation that
4407          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4408          This would require knowing the sign of OFFSET.
4409
4410          Also, we only look for the first addition in the computation of BASE.
4411          More complex analysis would be better, but introducing it just for
4412          this optimization seems like an overkill.  */
4413
4414 static bool
4415 difference_cannot_overflow_p (tree base, tree offset)
4416 {
4417   enum tree_code code;
4418   tree e1, e2;
4419
4420   if (!nowrap_type_p (TREE_TYPE (base)))
4421     return false;
4422
4423   base = expand_simple_operations (base);
4424
4425   if (TREE_CODE (base) == SSA_NAME)
4426     {
4427       gimple stmt = SSA_NAME_DEF_STMT (base);
4428
4429       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4430         return false;
4431
4432       code = gimple_assign_rhs_code (stmt);
4433       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4434         return false;
4435
4436       e1 = gimple_assign_rhs1 (stmt);
4437       e2 = gimple_assign_rhs2 (stmt);
4438     }
4439   else
4440     {
4441       code = TREE_CODE (base);
4442       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4443         return false;
4444       e1 = TREE_OPERAND (base, 0);
4445       e2 = TREE_OPERAND (base, 1);
4446     }
4447
4448   /* TODO: deeper inspection may be necessary to prove the equality.  */
4449   switch (code)
4450     {
4451     case PLUS_EXPR:
4452       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4453     case POINTER_PLUS_EXPR:
4454       return expr_equal_p (e2, offset);
4455
4456     default:
4457       return false;
4458     }
4459 }
4460
4461 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4462    comparison with CAND.  NITER describes the number of iterations of
4463    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4464
4465    We aim to handle the following situation:
4466
4467    sometype *base, *p;
4468    int a, b, i;
4469
4470    i = a;
4471    p = p_0 = base + a;
4472
4473    do
4474      {
4475        bla (*p);
4476        p++;
4477        i++;
4478      }
4479    while (i < b);
4480
4481    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4482    We aim to optimize this to
4483
4484    p = p_0 = base + a;
4485    do
4486      {
4487        bla (*p);
4488        p++;
4489      }
4490    while (p < p_0 - a + b);
4491
4492    This preserves the correctness, since the pointer arithmetics does not
4493    overflow.  More precisely:
4494
4495    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4496       overflow in computing it or the values of p.
4497    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4498       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4499
4500 static bool
4501 iv_elimination_compare_lt (struct ivopts_data *data,
4502                            struct iv_cand *cand, enum tree_code *comp_p,
4503                            struct tree_niter_desc *niter)
4504 {
4505   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4506   struct affine_tree_combination nit, tmpa, tmpb;
4507   enum tree_code comp;
4508   HOST_WIDE_INT step;
4509
4510   /* We need to know that the candidate induction variable does not overflow.
4511      While more complex analysis may be used to prove this, for now just
4512      check that the variable appears in the original program and that it
4513      is computed in a type that guarantees no overflows.  */
4514   cand_type = TREE_TYPE (cand->iv->base);
4515   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4516     return false;
4517
4518   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4519      the calculation of the BOUND could overflow, making the comparison
4520      invalid.  */
4521   if (!data->loop_single_exit_p)
4522     return false;
4523
4524   /* We need to be able to decide whether candidate is increasing or decreasing
4525      in order to choose the right comparison operator.  */
4526   if (!cst_and_fits_in_hwi (cand->iv->step))
4527     return false;
4528   step = int_cst_value (cand->iv->step);
4529
4530   /* Check that the number of iterations matches the expected pattern:
4531      a + 1 > b ? 0 : b - a - 1.  */
4532   mbz = niter->may_be_zero;
4533   if (TREE_CODE (mbz) == GT_EXPR)
4534     {
4535       /* Handle a + 1 > b.  */
4536       tree op0 = TREE_OPERAND (mbz, 0);
4537       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4538         {
4539           a = TREE_OPERAND (op0, 0);
4540           b = TREE_OPERAND (mbz, 1);
4541         }
4542       else
4543         return false;
4544     }
4545   else if (TREE_CODE (mbz) == LT_EXPR)
4546     {
4547       tree op1 = TREE_OPERAND (mbz, 1);
4548
4549       /* Handle b < a + 1.  */
4550       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4551         {
4552           a = TREE_OPERAND (op1, 0);
4553           b = TREE_OPERAND (mbz, 0);
4554         }
4555       else
4556         return false;
4557     }
4558   else
4559     return false;
4560
4561   /* Expected number of iterations is B - A - 1.  Check that it matches
4562      the actual number, i.e., that B - A - NITER = 1.  */
4563   tree_to_aff_combination (niter->niter, nit_type, &nit);
4564   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4565   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4566   aff_combination_scale (&nit, double_int_minus_one);
4567   aff_combination_scale (&tmpa, double_int_minus_one);
4568   aff_combination_add (&tmpb, &tmpa);
4569   aff_combination_add (&tmpb, &nit);
4570   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4571     return false;
4572
4573   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4574      overflow.  */
4575   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4576                         cand->iv->step,
4577                         fold_convert (TREE_TYPE (cand->iv->step), a));
4578   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4579     return false;
4580
4581   /* Determine the new comparison operator.  */
4582   comp = step < 0 ? GT_EXPR : LT_EXPR;
4583   if (*comp_p == NE_EXPR)
4584     *comp_p = comp;
4585   else if (*comp_p == EQ_EXPR)
4586     *comp_p = invert_tree_comparison (comp, false);
4587   else
4588     gcc_unreachable ();
4589
4590   return true;
4591 }
4592
4593 /* Check whether it is possible to express the condition in USE by comparison
4594    of candidate CAND.  If so, store the value compared with to BOUND, and the
4595    comparison operator to COMP.  */
4596
4597 static bool
4598 may_eliminate_iv (struct ivopts_data *data,
4599                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4600                   enum tree_code *comp)
4601 {
4602   basic_block ex_bb;
4603   edge exit;
4604   tree period;
4605   struct loop *loop = data->current_loop;
4606   aff_tree bnd;
4607   struct tree_niter_desc *desc = NULL;
4608
4609   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4610     return false;
4611
4612   /* For now works only for exits that dominate the loop latch.
4613      TODO: extend to other conditions inside loop body.  */
4614   ex_bb = gimple_bb (use->stmt);
4615   if (use->stmt != last_stmt (ex_bb)
4616       || gimple_code (use->stmt) != GIMPLE_COND
4617       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4618     return false;
4619
4620   exit = EDGE_SUCC (ex_bb, 0);
4621   if (flow_bb_inside_loop_p (loop, exit->dest))
4622     exit = EDGE_SUCC (ex_bb, 1);
4623   if (flow_bb_inside_loop_p (loop, exit->dest))
4624     return false;
4625
4626   desc = niter_for_exit (data, exit);
4627   if (!desc)
4628     return false;
4629
4630   /* Determine whether we can use the variable to test the exit condition.
4631      This is the case iff the period of the induction variable is greater
4632      than the number of iterations for which the exit condition is true.  */
4633   period = iv_period (cand->iv);
4634
4635   /* If the number of iterations is constant, compare against it directly.  */
4636   if (TREE_CODE (desc->niter) == INTEGER_CST)
4637     {
4638       /* See cand_value_at.  */
4639       if (stmt_after_increment (loop, cand, use->stmt))
4640         {
4641           if (!tree_int_cst_lt (desc->niter, period))
4642             return false;
4643         }
4644       else
4645         {
4646           if (tree_int_cst_lt (period, desc->niter))
4647             return false;
4648         }
4649     }
4650
4651   /* If not, and if this is the only possible exit of the loop, see whether
4652      we can get a conservative estimate on the number of iterations of the
4653      entire loop and compare against that instead.  */
4654   else
4655     {
4656       double_int period_value, max_niter;
4657
4658       max_niter = desc->max;
4659       if (stmt_after_increment (loop, cand, use->stmt))
4660         max_niter += double_int_one;
4661       period_value = tree_to_double_int (period);
4662       if (max_niter.ugt (period_value))
4663         {
4664           /* See if we can take advantage of inferred loop bound information.  */
4665           if (data->loop_single_exit_p)
4666             {
4667               if (!max_loop_iterations (loop, &max_niter))
4668                 return false;
4669               /* The loop bound is already adjusted by adding 1.  */
4670               if (max_niter.ugt (period_value))
4671                 return false;
4672             }
4673           else
4674             return false;
4675         }
4676     }
4677
4678   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4679
4680   *bound = aff_combination_to_tree (&bnd);
4681   *comp = iv_elimination_compare (data, use);
4682
4683   /* It is unlikely that computing the number of iterations using division
4684      would be more profitable than keeping the original induction variable.  */
4685   if (expression_expensive_p (*bound))
4686     return false;
4687
4688   /* Sometimes, it is possible to handle the situation that the number of
4689      iterations may be zero unless additional assumtions by using <
4690      instead of != in the exit condition.
4691
4692      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4693            base the exit condition on it.  However, that is often too
4694            expensive.  */
4695   if (!integer_zerop (desc->may_be_zero))
4696     return iv_elimination_compare_lt (data, cand, comp, desc);
4697
4698   return true;
4699 }
4700
4701  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4702     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4703
4704 static int
4705 parm_decl_cost (struct ivopts_data *data, tree bound)
4706 {
4707   tree sbound = bound;
4708   STRIP_NOPS (sbound);
4709
4710   if (TREE_CODE (sbound) == SSA_NAME
4711       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4712       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4713       && data->body_includes_call)
4714     return COSTS_N_INSNS (1);
4715
4716   return 0;
4717 }
4718
4719 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4720
4721 static bool
4722 determine_use_iv_cost_condition (struct ivopts_data *data,
4723                                  struct iv_use *use, struct iv_cand *cand)
4724 {
4725   tree bound = NULL_TREE;
4726   struct iv *cmp_iv;
4727   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4728   comp_cost elim_cost, express_cost, cost, bound_cost;
4729   bool ok;
4730   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4731   tree *control_var, *bound_cst;
4732   enum tree_code comp = ERROR_MARK;
4733
4734   /* Only consider real candidates.  */
4735   if (!cand->iv)
4736     {
4737       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4738                        ERROR_MARK, -1);
4739       return false;
4740     }
4741
4742   /* Try iv elimination.  */
4743   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4744     {
4745       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4746       if (elim_cost.cost == 0)
4747         elim_cost.cost = parm_decl_cost (data, bound);
4748       else if (TREE_CODE (bound) == INTEGER_CST)
4749         elim_cost.cost = 0;
4750       /* If we replace a loop condition 'i < n' with 'p < base + n',
4751          depends_on_elim will have 'base' and 'n' set, which implies
4752          that both 'base' and 'n' will be live during the loop.  More likely,
4753          'base + n' will be loop invariant, resulting in only one live value
4754          during the loop.  So in that case we clear depends_on_elim and set
4755         elim_inv_expr_id instead.  */
4756       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4757         {
4758           elim_inv_expr_id = get_expr_id (data, bound);
4759           bitmap_clear (depends_on_elim);
4760         }
4761       /* The bound is a loop invariant, so it will be only computed
4762          once.  */
4763       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4764     }
4765   else
4766     elim_cost = infinite_cost;
4767
4768   /* Try expressing the original giv.  If it is compared with an invariant,
4769      note that we cannot get rid of it.  */
4770   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4771                               NULL, &cmp_iv);
4772   gcc_assert (ok);
4773
4774   /* When the condition is a comparison of the candidate IV against
4775      zero, prefer this IV.
4776
4777      TODO: The constant that we're subtracting from the cost should
4778      be target-dependent.  This information should be added to the
4779      target costs for each backend.  */
4780   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4781       && integer_zerop (*bound_cst)
4782       && (operand_equal_p (*control_var, cand->var_after, 0)
4783           || operand_equal_p (*control_var, cand->var_before, 0)))
4784     elim_cost.cost -= 1;
4785
4786   express_cost = get_computation_cost (data, use, cand, false,
4787                                        &depends_on_express, NULL,
4788                                        &express_inv_expr_id);
4789   fd_ivopts_data = data;
4790   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4791
4792   /* Count the cost of the original bound as well.  */
4793   bound_cost = force_var_cost (data, *bound_cst, NULL);
4794   if (bound_cost.cost == 0)
4795     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4796   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4797     bound_cost.cost = 0;
4798   express_cost.cost += bound_cost.cost;
4799
4800   /* Choose the better approach, preferring the eliminated IV. */
4801   if (compare_costs (elim_cost, express_cost) <= 0)
4802     {
4803       cost = elim_cost;
4804       depends_on = depends_on_elim;
4805       depends_on_elim = NULL;
4806       inv_expr_id = elim_inv_expr_id;
4807     }
4808   else
4809     {
4810       cost = express_cost;
4811       depends_on = depends_on_express;
4812       depends_on_express = NULL;
4813       bound = NULL_TREE;
4814       comp = ERROR_MARK;
4815       inv_expr_id = express_inv_expr_id;
4816     }
4817
4818   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4819
4820   if (depends_on_elim)
4821     BITMAP_FREE (depends_on_elim);
4822   if (depends_on_express)
4823     BITMAP_FREE (depends_on_express);
4824
4825   return !infinite_cost_p (cost);
4826 }
4827
4828 /* Determines cost of basing replacement of USE on CAND.  Returns false
4829    if USE cannot be based on CAND.  */
4830
4831 static bool
4832 determine_use_iv_cost (struct ivopts_data *data,
4833                        struct iv_use *use, struct iv_cand *cand)
4834 {
4835   switch (use->type)
4836     {
4837     case USE_NONLINEAR_EXPR:
4838       return determine_use_iv_cost_generic (data, use, cand);
4839
4840     case USE_ADDRESS:
4841       return determine_use_iv_cost_address (data, use, cand);
4842
4843     case USE_COMPARE:
4844       return determine_use_iv_cost_condition (data, use, cand);
4845
4846     default:
4847       gcc_unreachable ();
4848     }
4849 }
4850
4851 /* Return true if get_computation_cost indicates that autoincrement is
4852    a possibility for the pair of USE and CAND, false otherwise.  */
4853
4854 static bool
4855 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4856                            struct iv_cand *cand)
4857 {
4858   bitmap depends_on;
4859   bool can_autoinc;
4860   comp_cost cost;
4861
4862   if (use->type != USE_ADDRESS)
4863     return false;
4864
4865   cost = get_computation_cost (data, use, cand, true, &depends_on,
4866                                &can_autoinc, NULL);
4867
4868   BITMAP_FREE (depends_on);
4869
4870   return !infinite_cost_p (cost) && can_autoinc;
4871 }
4872
4873 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4874    use that allows autoincrement, and set their AINC_USE if possible.  */
4875
4876 static void
4877 set_autoinc_for_original_candidates (struct ivopts_data *data)
4878 {
4879   unsigned i, j;
4880
4881   for (i = 0; i < n_iv_cands (data); i++)
4882     {
4883       struct iv_cand *cand = iv_cand (data, i);
4884       struct iv_use *closest_before = NULL;
4885       struct iv_use *closest_after = NULL;
4886       if (cand->pos != IP_ORIGINAL)
4887         continue;
4888
4889       for (j = 0; j < n_iv_uses (data); j++)
4890         {
4891           struct iv_use *use = iv_use (data, j);
4892           unsigned uid = gimple_uid (use->stmt);
4893
4894           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4895             continue;
4896
4897           if (uid < gimple_uid (cand->incremented_at)
4898               && (closest_before == NULL
4899                   || uid > gimple_uid (closest_before->stmt)))
4900             closest_before = use;
4901
4902           if (uid > gimple_uid (cand->incremented_at)
4903               && (closest_after == NULL
4904                   || uid < gimple_uid (closest_after->stmt)))
4905             closest_after = use;
4906         }
4907
4908       if (closest_before != NULL
4909           && autoinc_possible_for_pair (data, closest_before, cand))
4910         cand->ainc_use = closest_before;
4911       else if (closest_after != NULL
4912                && autoinc_possible_for_pair (data, closest_after, cand))
4913         cand->ainc_use = closest_after;
4914     }
4915 }
4916
4917 /* Finds the candidates for the induction variables.  */
4918
4919 static void
4920 find_iv_candidates (struct ivopts_data *data)
4921 {
4922   /* Add commonly used ivs.  */
4923   add_standard_iv_candidates (data);
4924
4925   /* Add old induction variables.  */
4926   add_old_ivs_candidates (data);
4927
4928   /* Add induction variables derived from uses.  */
4929   add_derived_ivs_candidates (data);
4930
4931   set_autoinc_for_original_candidates (data);
4932
4933   /* Record the important candidates.  */
4934   record_important_candidates (data);
4935 }
4936
4937 /* Determines costs of basing the use of the iv on an iv candidate.  */
4938
4939 static void
4940 determine_use_iv_costs (struct ivopts_data *data)
4941 {
4942   unsigned i, j;
4943   struct iv_use *use;
4944   struct iv_cand *cand;
4945   bitmap to_clear = BITMAP_ALLOC (NULL);
4946
4947   alloc_use_cost_map (data);
4948
4949   for (i = 0; i < n_iv_uses (data); i++)
4950     {
4951       use = iv_use (data, i);
4952
4953       if (data->consider_all_candidates)
4954         {
4955           for (j = 0; j < n_iv_cands (data); j++)
4956             {
4957               cand = iv_cand (data, j);
4958               determine_use_iv_cost (data, use, cand);
4959             }
4960         }
4961       else
4962         {
4963           bitmap_iterator bi;
4964
4965           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4966             {
4967               cand = iv_cand (data, j);
4968               if (!determine_use_iv_cost (data, use, cand))
4969                 bitmap_set_bit (to_clear, j);
4970             }
4971
4972           /* Remove the candidates for that the cost is infinite from
4973              the list of related candidates.  */
4974           bitmap_and_compl_into (use->related_cands, to_clear);
4975           bitmap_clear (to_clear);
4976         }
4977     }
4978
4979   BITMAP_FREE (to_clear);
4980
4981   if (dump_file && (dump_flags & TDF_DETAILS))
4982     {
4983       fprintf (dump_file, "Use-candidate costs:\n");
4984
4985       for (i = 0; i < n_iv_uses (data); i++)
4986         {
4987           use = iv_use (data, i);
4988
4989           fprintf (dump_file, "Use %d:\n", i);
4990           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4991           for (j = 0; j < use->n_map_members; j++)
4992             {
4993               if (!use->cost_map[j].cand
4994                   || infinite_cost_p (use->cost_map[j].cost))
4995                 continue;
4996
4997               fprintf (dump_file, "  %d\t%d\t%d\t",
4998                        use->cost_map[j].cand->id,
4999                        use->cost_map[j].cost.cost,
5000                        use->cost_map[j].cost.complexity);
5001               if (use->cost_map[j].depends_on)
5002                 bitmap_print (dump_file,
5003                               use->cost_map[j].depends_on, "","");
5004               if (use->cost_map[j].inv_expr_id != -1)
5005                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5006               fprintf (dump_file, "\n");
5007             }
5008
5009           fprintf (dump_file, "\n");
5010         }
5011       fprintf (dump_file, "\n");
5012     }
5013 }
5014
5015 /* Determines cost of the candidate CAND.  */
5016
5017 static void
5018 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5019 {
5020   comp_cost cost_base;
5021   unsigned cost, cost_step;
5022   tree base;
5023
5024   if (!cand->iv)
5025     {
5026       cand->cost = 0;
5027       return;
5028     }
5029
5030   /* There are two costs associated with the candidate -- its increment
5031      and its initialization.  The second is almost negligible for any loop
5032      that rolls enough, so we take it just very little into account.  */
5033
5034   base = cand->iv->base;
5035   cost_base = force_var_cost (data, base, NULL);
5036   /* It will be exceptional that the iv register happens to be initialized with
5037      the proper value at no cost.  In general, there will at least be a regcopy
5038      or a const set.  */
5039   if (cost_base.cost == 0)
5040     cost_base.cost = COSTS_N_INSNS (1);
5041   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5042
5043   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5044
5045   /* Prefer the original ivs unless we may gain something by replacing it.
5046      The reason is to make debugging simpler; so this is not relevant for
5047      artificial ivs created by other optimization passes.  */
5048   if (cand->pos != IP_ORIGINAL
5049       || !SSA_NAME_VAR (cand->var_before)
5050       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5051     cost++;
5052
5053   /* Prefer not to insert statements into latch unless there are some
5054      already (so that we do not create unnecessary jumps).  */
5055   if (cand->pos == IP_END
5056       && empty_block_p (ip_end_pos (data->current_loop)))
5057     cost++;
5058
5059   cand->cost = cost;
5060   cand->cost_step = cost_step;
5061 }
5062
5063 /* Determines costs of computation of the candidates.  */
5064
5065 static void
5066 determine_iv_costs (struct ivopts_data *data)
5067 {
5068   unsigned i;
5069
5070   if (dump_file && (dump_flags & TDF_DETAILS))
5071     {
5072       fprintf (dump_file, "Candidate costs:\n");
5073       fprintf (dump_file, "  cand\tcost\n");
5074     }
5075
5076   for (i = 0; i < n_iv_cands (data); i++)
5077     {
5078       struct iv_cand *cand = iv_cand (data, i);
5079
5080       determine_iv_cost (data, cand);
5081
5082       if (dump_file && (dump_flags & TDF_DETAILS))
5083         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5084     }
5085
5086   if (dump_file && (dump_flags & TDF_DETAILS))
5087     fprintf (dump_file, "\n");
5088 }
5089
5090 /* Calculates cost for having SIZE induction variables.  */
5091
5092 static unsigned
5093 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5094 {
5095   /* We add size to the cost, so that we prefer eliminating ivs
5096      if possible.  */
5097   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5098                                             data->body_includes_call);
5099 }
5100
5101 /* For each size of the induction variable set determine the penalty.  */
5102
5103 static void
5104 determine_set_costs (struct ivopts_data *data)
5105 {
5106   unsigned j, n;
5107   gimple phi;
5108   gimple_stmt_iterator psi;
5109   tree op;
5110   struct loop *loop = data->current_loop;
5111   bitmap_iterator bi;
5112
5113   if (dump_file && (dump_flags & TDF_DETAILS))
5114     {
5115       fprintf (dump_file, "Global costs:\n");
5116       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5117       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5118       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5119       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5120     }
5121
5122   n = 0;
5123   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5124     {
5125       phi = gsi_stmt (psi);
5126       op = PHI_RESULT (phi);
5127
5128       if (virtual_operand_p (op))
5129         continue;
5130
5131       if (get_iv (data, op))
5132         continue;
5133
5134       n++;
5135     }
5136
5137   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5138     {
5139       struct version_info *info = ver_info (data, j);
5140
5141       if (info->inv_id && info->has_nonlin_use)
5142         n++;
5143     }
5144
5145   data->regs_used = n;
5146   if (dump_file && (dump_flags & TDF_DETAILS))
5147     fprintf (dump_file, "  regs_used %d\n", n);
5148
5149   if (dump_file && (dump_flags & TDF_DETAILS))
5150     {
5151       fprintf (dump_file, "  cost for size:\n");
5152       fprintf (dump_file, "  ivs\tcost\n");
5153       for (j = 0; j <= 2 * target_avail_regs; j++)
5154         fprintf (dump_file, "  %d\t%d\n", j,
5155                  ivopts_global_cost_for_size (data, j));
5156       fprintf (dump_file, "\n");
5157     }
5158 }
5159
5160 /* Returns true if A is a cheaper cost pair than B.  */
5161
5162 static bool
5163 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5164 {
5165   int cmp;
5166
5167   if (!a)
5168     return false;
5169
5170   if (!b)
5171     return true;
5172
5173   cmp = compare_costs (a->cost, b->cost);
5174   if (cmp < 0)
5175     return true;
5176
5177   if (cmp > 0)
5178     return false;
5179
5180   /* In case the costs are the same, prefer the cheaper candidate.  */
5181   if (a->cand->cost < b->cand->cost)
5182     return true;
5183
5184   return false;
5185 }
5186
5187
5188 /* Returns candidate by that USE is expressed in IVS.  */
5189
5190 static struct cost_pair *
5191 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5192 {
5193   return ivs->cand_for_use[use->id];
5194 }
5195
5196 /* Computes the cost field of IVS structure.  */
5197
5198 static void
5199 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5200 {
5201   comp_cost cost = ivs->cand_use_cost;
5202
5203   cost.cost += ivs->cand_cost;
5204
5205   cost.cost += ivopts_global_cost_for_size (data,
5206                                             ivs->n_regs + ivs->num_used_inv_expr);
5207
5208   ivs->cost = cost;
5209 }
5210
5211 /* Remove invariants in set INVS to set IVS.  */
5212
5213 static void
5214 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5215 {
5216   bitmap_iterator bi;
5217   unsigned iid;
5218
5219   if (!invs)
5220     return;
5221
5222   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5223     {
5224       ivs->n_invariant_uses[iid]--;
5225       if (ivs->n_invariant_uses[iid] == 0)
5226         ivs->n_regs--;
5227     }
5228 }
5229
5230 /* Set USE not to be expressed by any candidate in IVS.  */
5231
5232 static void
5233 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5234                  struct iv_use *use)
5235 {
5236   unsigned uid = use->id, cid;
5237   struct cost_pair *cp;
5238
5239   cp = ivs->cand_for_use[uid];
5240   if (!cp)
5241     return;
5242   cid = cp->cand->id;
5243
5244   ivs->bad_uses++;
5245   ivs->cand_for_use[uid] = NULL;
5246   ivs->n_cand_uses[cid]--;
5247
5248   if (ivs->n_cand_uses[cid] == 0)
5249     {
5250       bitmap_clear_bit (ivs->cands, cid);
5251       /* Do not count the pseudocandidates.  */
5252       if (cp->cand->iv)
5253         ivs->n_regs--;
5254       ivs->n_cands--;
5255       ivs->cand_cost -= cp->cand->cost;
5256
5257       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5258     }
5259
5260   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5261
5262   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5263
5264   if (cp->inv_expr_id != -1)
5265     {
5266       ivs->used_inv_expr[cp->inv_expr_id]--;
5267       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5268         ivs->num_used_inv_expr--;
5269     }
5270   iv_ca_recount_cost (data, ivs);
5271 }
5272
5273 /* Add invariants in set INVS to set IVS.  */
5274
5275 static void
5276 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5277 {
5278   bitmap_iterator bi;
5279   unsigned iid;
5280
5281   if (!invs)
5282     return;
5283
5284   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5285     {
5286       ivs->n_invariant_uses[iid]++;
5287       if (ivs->n_invariant_uses[iid] == 1)
5288         ivs->n_regs++;
5289     }
5290 }
5291
5292 /* Set cost pair for USE in set IVS to CP.  */
5293
5294 static void
5295 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5296               struct iv_use *use, struct cost_pair *cp)
5297 {
5298   unsigned uid = use->id, cid;
5299
5300   if (ivs->cand_for_use[uid] == cp)
5301     return;
5302
5303   if (ivs->cand_for_use[uid])
5304     iv_ca_set_no_cp (data, ivs, use);
5305
5306   if (cp)
5307     {
5308       cid = cp->cand->id;
5309
5310       ivs->bad_uses--;
5311       ivs->cand_for_use[uid] = cp;
5312       ivs->n_cand_uses[cid]++;
5313       if (ivs->n_cand_uses[cid] == 1)
5314         {
5315           bitmap_set_bit (ivs->cands, cid);
5316           /* Do not count the pseudocandidates.  */
5317           if (cp->cand->iv)
5318             ivs->n_regs++;
5319           ivs->n_cands++;
5320           ivs->cand_cost += cp->cand->cost;
5321
5322           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5323         }
5324
5325       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5326       iv_ca_set_add_invariants (ivs, cp->depends_on);
5327
5328       if (cp->inv_expr_id != -1)
5329         {
5330           ivs->used_inv_expr[cp->inv_expr_id]++;
5331           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5332             ivs->num_used_inv_expr++;
5333         }
5334       iv_ca_recount_cost (data, ivs);
5335     }
5336 }
5337
5338 /* Extend set IVS by expressing USE by some of the candidates in it
5339    if possible. All important candidates will be considered
5340    if IMPORTANT_CANDIDATES is true.  */
5341
5342 static void
5343 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5344                struct iv_use *use, bool important_candidates)
5345 {
5346   struct cost_pair *best_cp = NULL, *cp;
5347   bitmap_iterator bi;
5348   bitmap cands;
5349   unsigned i;
5350
5351   gcc_assert (ivs->upto >= use->id);
5352
5353   if (ivs->upto == use->id)
5354     {
5355       ivs->upto++;
5356       ivs->bad_uses++;
5357     }
5358
5359   cands = (important_candidates ? data->important_candidates : ivs->cands);
5360   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5361     {
5362       struct iv_cand *cand = iv_cand (data, i);
5363
5364       cp = get_use_iv_cost (data, use, cand);
5365
5366       if (cheaper_cost_pair (cp, best_cp))
5367         best_cp = cp;
5368     }
5369
5370   iv_ca_set_cp (data, ivs, use, best_cp);
5371 }
5372
5373 /* Get cost for assignment IVS.  */
5374
5375 static comp_cost
5376 iv_ca_cost (struct iv_ca *ivs)
5377 {
5378   /* This was a conditional expression but it triggered a bug in
5379      Sun C 5.5.  */
5380   if (ivs->bad_uses)
5381     return infinite_cost;
5382   else
5383     return ivs->cost;
5384 }
5385
5386 /* Returns true if all dependences of CP are among invariants in IVS.  */
5387
5388 static bool
5389 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5390 {
5391   unsigned i;
5392   bitmap_iterator bi;
5393
5394   if (!cp->depends_on)
5395     return true;
5396
5397   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5398     {
5399       if (ivs->n_invariant_uses[i] == 0)
5400         return false;
5401     }
5402
5403   return true;
5404 }
5405
5406 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5407    it before NEXT_CHANGE.  */
5408
5409 static struct iv_ca_delta *
5410 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5411                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5412 {
5413   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5414
5415   change->use = use;
5416   change->old_cp = old_cp;
5417   change->new_cp = new_cp;
5418   change->next_change = next_change;
5419
5420   return change;
5421 }
5422
5423 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5424    are rewritten.  */
5425
5426 static struct iv_ca_delta *
5427 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5428 {
5429   struct iv_ca_delta *last;
5430
5431   if (!l2)
5432     return l1;
5433
5434   if (!l1)
5435     return l2;
5436
5437   for (last = l1; last->next_change; last = last->next_change)
5438     continue;
5439   last->next_change = l2;
5440
5441   return l1;
5442 }
5443
5444 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5445
5446 static struct iv_ca_delta *
5447 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5448 {
5449   struct iv_ca_delta *act, *next, *prev = NULL;
5450   struct cost_pair *tmp;
5451
5452   for (act = delta; act; act = next)
5453     {
5454       next = act->next_change;
5455       act->next_change = prev;
5456       prev = act;
5457
5458       tmp = act->old_cp;
5459       act->old_cp = act->new_cp;
5460       act->new_cp = tmp;
5461     }
5462
5463   return prev;
5464 }
5465
5466 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5467    reverted instead.  */
5468
5469 static void
5470 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5471                     struct iv_ca_delta *delta, bool forward)
5472 {
5473   struct cost_pair *from, *to;
5474   struct iv_ca_delta *act;
5475
5476   if (!forward)
5477     delta = iv_ca_delta_reverse (delta);
5478
5479   for (act = delta; act; act = act->next_change)
5480     {
5481       from = act->old_cp;
5482       to = act->new_cp;
5483       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5484       iv_ca_set_cp (data, ivs, act->use, to);
5485     }
5486
5487   if (!forward)
5488     iv_ca_delta_reverse (delta);
5489 }
5490
5491 /* Returns true if CAND is used in IVS.  */
5492
5493 static bool
5494 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5495 {
5496   return ivs->n_cand_uses[cand->id] > 0;
5497 }
5498
5499 /* Returns number of induction variable candidates in the set IVS.  */
5500
5501 static unsigned
5502 iv_ca_n_cands (struct iv_ca *ivs)
5503 {
5504   return ivs->n_cands;
5505 }
5506
5507 /* Free the list of changes DELTA.  */
5508
5509 static void
5510 iv_ca_delta_free (struct iv_ca_delta **delta)
5511 {
5512   struct iv_ca_delta *act, *next;
5513
5514   for (act = *delta; act; act = next)
5515     {
5516       next = act->next_change;
5517       free (act);
5518     }
5519
5520   *delta = NULL;
5521 }
5522
5523 /* Allocates new iv candidates assignment.  */
5524
5525 static struct iv_ca *
5526 iv_ca_new (struct ivopts_data *data)
5527 {
5528   struct iv_ca *nw = XNEW (struct iv_ca);
5529
5530   nw->upto = 0;
5531   nw->bad_uses = 0;
5532   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5533   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5534   nw->cands = BITMAP_ALLOC (NULL);
5535   nw->n_cands = 0;
5536   nw->n_regs = 0;
5537   nw->cand_use_cost = no_cost;
5538   nw->cand_cost = 0;
5539   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5540   nw->cost = no_cost;
5541   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5542   nw->num_used_inv_expr = 0;
5543
5544   return nw;
5545 }
5546
5547 /* Free memory occupied by the set IVS.  */
5548
5549 static void
5550 iv_ca_free (struct iv_ca **ivs)
5551 {
5552   free ((*ivs)->cand_for_use);
5553   free ((*ivs)->n_cand_uses);
5554   BITMAP_FREE ((*ivs)->cands);
5555   free ((*ivs)->n_invariant_uses);
5556   free ((*ivs)->used_inv_expr);
5557   free (*ivs);
5558   *ivs = NULL;
5559 }
5560
5561 /* Dumps IVS to FILE.  */
5562
5563 static void
5564 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5565 {
5566   const char *pref = "  invariants ";
5567   unsigned i;
5568   comp_cost cost = iv_ca_cost (ivs);
5569
5570   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5571   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5572            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5573   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5574
5575    for (i = 0; i < ivs->upto; i++)
5576     {
5577       struct iv_use *use = iv_use (data, i);
5578       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5579       if (cp)
5580         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5581                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5582       else
5583         fprintf (file, "   use:%d --> ??\n", use->id);
5584     }
5585
5586   for (i = 1; i <= data->max_inv_id; i++)
5587     if (ivs->n_invariant_uses[i])
5588       {
5589         fprintf (file, "%s%d", pref, i);
5590         pref = ", ";
5591       }
5592   fprintf (file, "\n\n");
5593 }
5594
5595 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5596    new set, and store differences in DELTA.  Number of induction variables
5597    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5598    the function will try to find a solution with mimimal iv candidates.  */
5599
5600 static comp_cost
5601 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5602               struct iv_cand *cand, struct iv_ca_delta **delta,
5603               unsigned *n_ivs, bool min_ncand)
5604 {
5605   unsigned i;
5606   comp_cost cost;
5607   struct iv_use *use;
5608   struct cost_pair *old_cp, *new_cp;
5609
5610   *delta = NULL;
5611   for (i = 0; i < ivs->upto; i++)
5612     {
5613       use = iv_use (data, i);
5614       old_cp = iv_ca_cand_for_use (ivs, use);
5615
5616       if (old_cp
5617           && old_cp->cand == cand)
5618         continue;
5619
5620       new_cp = get_use_iv_cost (data, use, cand);
5621       if (!new_cp)
5622         continue;
5623
5624       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5625         continue;
5626
5627       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5628         continue;
5629
5630       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5631     }
5632
5633   iv_ca_delta_commit (data, ivs, *delta, true);
5634   cost = iv_ca_cost (ivs);
5635   if (n_ivs)
5636     *n_ivs = iv_ca_n_cands (ivs);
5637   iv_ca_delta_commit (data, ivs, *delta, false);
5638
5639   return cost;
5640 }
5641
5642 /* Try narrowing set IVS by removing CAND.  Return the cost of
5643    the new set and store the differences in DELTA.  */
5644
5645 static comp_cost
5646 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5647               struct iv_cand *cand, struct iv_ca_delta **delta)
5648 {
5649   unsigned i, ci;
5650   struct iv_use *use;
5651   struct cost_pair *old_cp, *new_cp, *cp;
5652   bitmap_iterator bi;
5653   struct iv_cand *cnd;
5654   comp_cost cost;
5655
5656   *delta = NULL;
5657   for (i = 0; i < n_iv_uses (data); i++)
5658     {
5659       use = iv_use (data, i);
5660
5661       old_cp = iv_ca_cand_for_use (ivs, use);
5662       if (old_cp->cand != cand)
5663         continue;
5664
5665       new_cp = NULL;
5666
5667       if (data->consider_all_candidates)
5668         {
5669           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5670             {
5671               if (ci == cand->id)
5672                 continue;
5673
5674               cnd = iv_cand (data, ci);
5675
5676               cp = get_use_iv_cost (data, use, cnd);
5677               if (!cp)
5678                 continue;
5679
5680               if (!iv_ca_has_deps (ivs, cp))
5681                 continue;
5682
5683               if (!cheaper_cost_pair (cp, new_cp))
5684                 continue;
5685
5686               new_cp = cp;
5687             }
5688         }
5689       else
5690         {
5691           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5692             {
5693               if (ci == cand->id)
5694                 continue;
5695
5696               cnd = iv_cand (data, ci);
5697
5698               cp = get_use_iv_cost (data, use, cnd);
5699               if (!cp)
5700                 continue;
5701               if (!iv_ca_has_deps (ivs, cp))
5702                 continue;
5703
5704               if (!cheaper_cost_pair (cp, new_cp))
5705                 continue;
5706
5707               new_cp = cp;
5708             }
5709         }
5710
5711       if (!new_cp)
5712         {
5713           iv_ca_delta_free (delta);
5714           return infinite_cost;
5715         }
5716
5717       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5718     }
5719
5720   iv_ca_delta_commit (data, ivs, *delta, true);
5721   cost = iv_ca_cost (ivs);
5722   iv_ca_delta_commit (data, ivs, *delta, false);
5723
5724   return cost;
5725 }
5726
5727 /* Try optimizing the set of candidates IVS by removing candidates different
5728    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5729    differences in DELTA.  */
5730
5731 static comp_cost
5732 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5733              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5734 {
5735   bitmap_iterator bi;
5736   struct iv_ca_delta *act_delta, *best_delta;
5737   unsigned i;
5738   comp_cost best_cost, acost;
5739   struct iv_cand *cand;
5740
5741   best_delta = NULL;
5742   best_cost = iv_ca_cost (ivs);
5743
5744   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5745     {
5746       cand = iv_cand (data, i);
5747
5748       if (cand == except_cand)
5749         continue;
5750
5751       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5752
5753       if (compare_costs (acost, best_cost) < 0)
5754         {
5755           best_cost = acost;
5756           iv_ca_delta_free (&best_delta);
5757           best_delta = act_delta;
5758         }
5759       else
5760         iv_ca_delta_free (&act_delta);
5761     }
5762
5763   if (!best_delta)
5764     {
5765       *delta = NULL;
5766       return best_cost;
5767     }
5768
5769   /* Recurse to possibly remove other unnecessary ivs.  */
5770   iv_ca_delta_commit (data, ivs, best_delta, true);
5771   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5772   iv_ca_delta_commit (data, ivs, best_delta, false);
5773   *delta = iv_ca_delta_join (best_delta, *delta);
5774   return best_cost;
5775 }
5776
5777 /* Tries to extend the sets IVS in the best possible way in order
5778    to express the USE.  If ORIGINALP is true, prefer candidates from
5779    the original set of IVs, otherwise favor important candidates not
5780    based on any memory object.  */
5781
5782 static bool
5783 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5784                   struct iv_use *use, bool originalp)
5785 {
5786   comp_cost best_cost, act_cost;
5787   unsigned i;
5788   bitmap_iterator bi;
5789   struct iv_cand *cand;
5790   struct iv_ca_delta *best_delta = NULL, *act_delta;
5791   struct cost_pair *cp;
5792
5793   iv_ca_add_use (data, ivs, use, false);
5794   best_cost = iv_ca_cost (ivs);
5795
5796   cp = iv_ca_cand_for_use (ivs, use);
5797   if (!cp)
5798     {
5799       ivs->upto--;
5800       ivs->bad_uses--;
5801       iv_ca_add_use (data, ivs, use, true);
5802       best_cost = iv_ca_cost (ivs);
5803       cp = iv_ca_cand_for_use (ivs, use);
5804     }
5805   if (cp)
5806     {
5807       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5808       iv_ca_set_no_cp (data, ivs, use);
5809     }
5810
5811   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5812      first try important candidates not based on any memory object.  Only if
5813      this fails, try the specific ones.  Rationale -- in loops with many
5814      variables the best choice often is to use just one generic biv.  If we
5815      added here many ivs specific to the uses, the optimization algorithm later
5816      would be likely to get stuck in a local minimum, thus causing us to create
5817      too many ivs.  The approach from few ivs to more seems more likely to be
5818      successful -- starting from few ivs, replacing an expensive use by a
5819      specific iv should always be a win.  */
5820   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5821     {
5822       cand = iv_cand (data, i);
5823
5824       if (originalp && cand->pos !=IP_ORIGINAL)
5825         continue;
5826
5827       if (!originalp && cand->iv->base_object != NULL_TREE)
5828         continue;
5829
5830       if (iv_ca_cand_used_p (ivs, cand))
5831         continue;
5832
5833       cp = get_use_iv_cost (data, use, cand);
5834       if (!cp)
5835         continue;
5836
5837       iv_ca_set_cp (data, ivs, use, cp);
5838       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5839                                true);
5840       iv_ca_set_no_cp (data, ivs, use);
5841       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5842
5843       if (compare_costs (act_cost, best_cost) < 0)
5844         {
5845           best_cost = act_cost;
5846
5847           iv_ca_delta_free (&best_delta);
5848           best_delta = act_delta;
5849         }
5850       else
5851         iv_ca_delta_free (&act_delta);
5852     }
5853
5854   if (infinite_cost_p (best_cost))
5855     {
5856       for (i = 0; i < use->n_map_members; i++)
5857         {
5858           cp = use->cost_map + i;
5859           cand = cp->cand;
5860           if (!cand)
5861             continue;
5862
5863           /* Already tried this.  */
5864           if (cand->important)
5865             {
5866               if (originalp && cand->pos == IP_ORIGINAL)
5867                 continue;
5868               if (!originalp && cand->iv->base_object == NULL_TREE)
5869                 continue;
5870             }
5871
5872           if (iv_ca_cand_used_p (ivs, cand))
5873             continue;
5874
5875           act_delta = NULL;
5876           iv_ca_set_cp (data, ivs, use, cp);
5877           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5878           iv_ca_set_no_cp (data, ivs, use);
5879           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5880                                        cp, act_delta);
5881
5882           if (compare_costs (act_cost, best_cost) < 0)
5883             {
5884               best_cost = act_cost;
5885
5886               if (best_delta)
5887                 iv_ca_delta_free (&best_delta);
5888               best_delta = act_delta;
5889             }
5890           else
5891             iv_ca_delta_free (&act_delta);
5892         }
5893     }
5894
5895   iv_ca_delta_commit (data, ivs, best_delta, true);
5896   iv_ca_delta_free (&best_delta);
5897
5898   return !infinite_cost_p (best_cost);
5899 }
5900
5901 /* Finds an initial assignment of candidates to uses.  */
5902
5903 static struct iv_ca *
5904 get_initial_solution (struct ivopts_data *data, bool originalp)
5905 {
5906   struct iv_ca *ivs = iv_ca_new (data);
5907   unsigned i;
5908
5909   for (i = 0; i < n_iv_uses (data); i++)
5910     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5911       {
5912         iv_ca_free (&ivs);
5913         return NULL;
5914       }
5915
5916   return ivs;
5917 }
5918
5919 /* Tries to improve set of induction variables IVS.  */
5920
5921 static bool
5922 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5923 {
5924   unsigned i, n_ivs;
5925   comp_cost acost, best_cost = iv_ca_cost (ivs);
5926   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5927   struct iv_cand *cand;
5928
5929   /* Try extending the set of induction variables by one.  */
5930   for (i = 0; i < n_iv_cands (data); i++)
5931     {
5932       cand = iv_cand (data, i);
5933
5934       if (iv_ca_cand_used_p (ivs, cand))
5935         continue;
5936
5937       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5938       if (!act_delta)
5939         continue;
5940
5941       /* If we successfully added the candidate and the set is small enough,
5942          try optimizing it by removing other candidates.  */
5943       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5944         {
5945           iv_ca_delta_commit (data, ivs, act_delta, true);
5946           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5947           iv_ca_delta_commit (data, ivs, act_delta, false);
5948           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5949         }
5950
5951       if (compare_costs (acost, best_cost) < 0)
5952         {
5953           best_cost = acost;
5954           iv_ca_delta_free (&best_delta);
5955           best_delta = act_delta;
5956         }
5957       else
5958         iv_ca_delta_free (&act_delta);
5959     }
5960
5961   if (!best_delta)
5962     {
5963       /* Try removing the candidates from the set instead.  */
5964       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5965
5966       /* Nothing more we can do.  */
5967       if (!best_delta)
5968         return false;
5969     }
5970
5971   iv_ca_delta_commit (data, ivs, best_delta, true);
5972   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5973   iv_ca_delta_free (&best_delta);
5974   return true;
5975 }
5976
5977 /* Attempts to find the optimal set of induction variables.  We do simple
5978    greedy heuristic -- we try to replace at most one candidate in the selected
5979    solution and remove the unused ivs while this improves the cost.  */
5980
5981 static struct iv_ca *
5982 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5983 {
5984   struct iv_ca *set;
5985
5986   /* Get the initial solution.  */
5987   set = get_initial_solution (data, originalp);
5988   if (!set)
5989     {
5990       if (dump_file && (dump_flags & TDF_DETAILS))
5991         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5992       return NULL;
5993     }
5994
5995   if (dump_file && (dump_flags & TDF_DETAILS))
5996     {
5997       fprintf (dump_file, "Initial set of candidates:\n");
5998       iv_ca_dump (data, dump_file, set);
5999     }
6000
6001   while (try_improve_iv_set (data, set))
6002     {
6003       if (dump_file && (dump_flags & TDF_DETAILS))
6004         {
6005           fprintf (dump_file, "Improved to:\n");
6006           iv_ca_dump (data, dump_file, set);
6007         }
6008     }
6009
6010   return set;
6011 }
6012
6013 static struct iv_ca *
6014 find_optimal_iv_set (struct ivopts_data *data)
6015 {
6016   unsigned i;
6017   struct iv_ca *set, *origset;
6018   struct iv_use *use;
6019   comp_cost cost, origcost;
6020
6021   /* Determine the cost based on a strategy that starts with original IVs,
6022      and try again using a strategy that prefers candidates not based
6023      on any IVs.  */
6024   origset = find_optimal_iv_set_1 (data, true);
6025   set = find_optimal_iv_set_1 (data, false);
6026
6027   if (!origset && !set)
6028     return NULL;
6029
6030   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6031   cost = set ? iv_ca_cost (set) : infinite_cost;
6032
6033   if (dump_file && (dump_flags & TDF_DETAILS))
6034     {
6035       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6036                origcost.cost, origcost.complexity);
6037       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6038                cost.cost, cost.complexity);
6039     }
6040
6041   /* Choose the one with the best cost.  */
6042   if (compare_costs (origcost, cost) <= 0)
6043     {
6044       if (set)
6045         iv_ca_free (&set);
6046       set = origset;
6047     }
6048   else if (origset)
6049     iv_ca_free (&origset);
6050
6051   for (i = 0; i < n_iv_uses (data); i++)
6052     {
6053       use = iv_use (data, i);
6054       use->selected = iv_ca_cand_for_use (set, use)->cand;
6055     }
6056
6057   return set;
6058 }
6059
6060 /* Creates a new induction variable corresponding to CAND.  */
6061
6062 static void
6063 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6064 {
6065   gimple_stmt_iterator incr_pos;
6066   tree base;
6067   bool after = false;
6068
6069   if (!cand->iv)
6070     return;
6071
6072   switch (cand->pos)
6073     {
6074     case IP_NORMAL:
6075       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6076       break;
6077
6078     case IP_END:
6079       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6080       after = true;
6081       break;
6082
6083     case IP_AFTER_USE:
6084       after = true;
6085       /* fall through */
6086     case IP_BEFORE_USE:
6087       incr_pos = gsi_for_stmt (cand->incremented_at);
6088       break;
6089
6090     case IP_ORIGINAL:
6091       /* Mark that the iv is preserved.  */
6092       name_info (data, cand->var_before)->preserve_biv = true;
6093       name_info (data, cand->var_after)->preserve_biv = true;
6094
6095       /* Rewrite the increment so that it uses var_before directly.  */
6096       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6097       return;
6098     }
6099
6100   gimple_add_tmp_var (cand->var_before);
6101
6102   base = unshare_expr (cand->iv->base);
6103
6104   create_iv (base, unshare_expr (cand->iv->step),
6105              cand->var_before, data->current_loop,
6106              &incr_pos, after, &cand->var_before, &cand->var_after);
6107 }
6108
6109 /* Creates new induction variables described in SET.  */
6110
6111 static void
6112 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6113 {
6114   unsigned i;
6115   struct iv_cand *cand;
6116   bitmap_iterator bi;
6117
6118   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6119     {
6120       cand = iv_cand (data, i);
6121       create_new_iv (data, cand);
6122     }
6123
6124   if (dump_file && (dump_flags & TDF_DETAILS))
6125     {
6126       fprintf (dump_file, "\nSelected IV set: \n");
6127       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6128         {
6129           cand = iv_cand (data, i);
6130           dump_cand (dump_file, cand);
6131         }
6132       fprintf (dump_file, "\n");
6133     }
6134 }
6135
6136 /* Rewrites USE (definition of iv used in a nonlinear expression)
6137    using candidate CAND.  */
6138
6139 static void
6140 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6141                             struct iv_use *use, struct iv_cand *cand)
6142 {
6143   tree comp;
6144   tree op, tgt;
6145   gimple ass;
6146   gimple_stmt_iterator bsi;
6147
6148   /* An important special case -- if we are asked to express value of
6149      the original iv by itself, just exit; there is no need to
6150      introduce a new computation (that might also need casting the
6151      variable to unsigned and back).  */
6152   if (cand->pos == IP_ORIGINAL
6153       && cand->incremented_at == use->stmt)
6154     {
6155       enum tree_code stmt_code;
6156
6157       gcc_assert (is_gimple_assign (use->stmt));
6158       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6159
6160       /* Check whether we may leave the computation unchanged.
6161          This is the case only if it does not rely on other
6162          computations in the loop -- otherwise, the computation
6163          we rely upon may be removed in remove_unused_ivs,
6164          thus leading to ICE.  */
6165       stmt_code = gimple_assign_rhs_code (use->stmt);
6166       if (stmt_code == PLUS_EXPR
6167           || stmt_code == MINUS_EXPR
6168           || stmt_code == POINTER_PLUS_EXPR)
6169         {
6170           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6171             op = gimple_assign_rhs2 (use->stmt);
6172           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6173             op = gimple_assign_rhs1 (use->stmt);
6174           else
6175             op = NULL_TREE;
6176         }
6177       else
6178         op = NULL_TREE;
6179
6180       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6181         return;
6182     }
6183
6184   comp = get_computation (data->current_loop, use, cand);
6185   gcc_assert (comp != NULL_TREE);
6186
6187   switch (gimple_code (use->stmt))
6188     {
6189     case GIMPLE_PHI:
6190       tgt = PHI_RESULT (use->stmt);
6191
6192       /* If we should keep the biv, do not replace it.  */
6193       if (name_info (data, tgt)->preserve_biv)
6194         return;
6195
6196       bsi = gsi_after_labels (gimple_bb (use->stmt));
6197       break;
6198
6199     case GIMPLE_ASSIGN:
6200       tgt = gimple_assign_lhs (use->stmt);
6201       bsi = gsi_for_stmt (use->stmt);
6202       break;
6203
6204     default:
6205       gcc_unreachable ();
6206     }
6207
6208   if (!valid_gimple_rhs_p (comp)
6209       || (gimple_code (use->stmt) != GIMPLE_PHI
6210           /* We can't allow re-allocating the stmt as it might be pointed
6211              to still.  */
6212           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6213               >= gimple_num_ops (gsi_stmt (bsi)))))
6214     {
6215       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6216                                        true, GSI_SAME_STMT);
6217       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6218         {
6219           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6220           /* As this isn't a plain copy we have to reset alignment
6221              information.  */
6222           if (SSA_NAME_PTR_INFO (comp))
6223             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6224         }
6225     }
6226
6227   if (gimple_code (use->stmt) == GIMPLE_PHI)
6228     {
6229       ass = gimple_build_assign (tgt, comp);
6230       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6231
6232       bsi = gsi_for_stmt (use->stmt);
6233       remove_phi_node (&bsi, false);
6234     }
6235   else
6236     {
6237       gimple_assign_set_rhs_from_tree (&bsi, comp);
6238       use->stmt = gsi_stmt (bsi);
6239     }
6240 }
6241
6242 /* Performs a peephole optimization to reorder the iv update statement with
6243    a mem ref to enable instruction combining in later phases. The mem ref uses
6244    the iv value before the update, so the reordering transformation requires
6245    adjustment of the offset. CAND is the selected IV_CAND.
6246
6247    Example:
6248
6249    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6250    iv2 = iv1 + 1;
6251
6252    if (t < val)      (1)
6253      goto L;
6254    goto Head;
6255
6256
6257    directly propagating t over to (1) will introduce overlapping live range
6258    thus increase register pressure. This peephole transform it into:
6259
6260
6261    iv2 = iv1 + 1;
6262    t = MEM_REF (base, iv2, 8, 8);
6263    if (t < val)
6264      goto L;
6265    goto Head;
6266 */
6267
6268 static void
6269 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6270 {
6271   tree var_after;
6272   gimple iv_update, stmt;
6273   basic_block bb;
6274   gimple_stmt_iterator gsi, gsi_iv;
6275
6276   if (cand->pos != IP_NORMAL)
6277     return;
6278
6279   var_after = cand->var_after;
6280   iv_update = SSA_NAME_DEF_STMT (var_after);
6281
6282   bb = gimple_bb (iv_update);
6283   gsi = gsi_last_nondebug_bb (bb);
6284   stmt = gsi_stmt (gsi);
6285
6286   /* Only handle conditional statement for now.  */
6287   if (gimple_code (stmt) != GIMPLE_COND)
6288     return;
6289
6290   gsi_prev_nondebug (&gsi);
6291   stmt = gsi_stmt (gsi);
6292   if (stmt != iv_update)
6293     return;
6294
6295   gsi_prev_nondebug (&gsi);
6296   if (gsi_end_p (gsi))
6297     return;
6298
6299   stmt = gsi_stmt (gsi);
6300   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6301     return;
6302
6303   if (stmt != use->stmt)
6304     return;
6305
6306   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6307     return;
6308
6309   if (dump_file && (dump_flags & TDF_DETAILS))
6310     {
6311       fprintf (dump_file, "Reordering \n");
6312       print_gimple_stmt (dump_file, iv_update, 0, 0);
6313       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6314       fprintf (dump_file, "\n");
6315     }
6316
6317   gsi = gsi_for_stmt (use->stmt);
6318   gsi_iv = gsi_for_stmt (iv_update);
6319   gsi_move_before (&gsi_iv, &gsi);
6320
6321   cand->pos = IP_BEFORE_USE;
6322   cand->incremented_at = use->stmt;
6323 }
6324
6325 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6326
6327 static void
6328 rewrite_use_address (struct ivopts_data *data,
6329                      struct iv_use *use, struct iv_cand *cand)
6330 {
6331   aff_tree aff;
6332   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6333   tree base_hint = NULL_TREE;
6334   tree ref, iv;
6335   bool ok;
6336
6337   adjust_iv_update_pos (cand, use);
6338   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6339   gcc_assert (ok);
6340   unshare_aff_combination (&aff);
6341
6342   /* To avoid undefined overflow problems, all IV candidates use unsigned
6343      integer types.  The drawback is that this makes it impossible for
6344      create_mem_ref to distinguish an IV that is based on a memory object
6345      from one that represents simply an offset.
6346
6347      To work around this problem, we pass a hint to create_mem_ref that
6348      indicates which variable (if any) in aff is an IV based on a memory
6349      object.  Note that we only consider the candidate.  If this is not
6350      based on an object, the base of the reference is in some subexpression
6351      of the use -- but these will use pointer types, so they are recognized
6352      by the create_mem_ref heuristics anyway.  */
6353   if (cand->iv->base_object)
6354     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6355
6356   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6357   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6358                         reference_alias_ptr_type (*use->op_p),
6359                         iv, base_hint, data->speed);
6360   copy_ref_info (ref, *use->op_p);
6361   *use->op_p = ref;
6362 }
6363
6364 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6365    candidate CAND.  */
6366
6367 static void
6368 rewrite_use_compare (struct ivopts_data *data,
6369                      struct iv_use *use, struct iv_cand *cand)
6370 {
6371   tree comp, *var_p, op, bound;
6372   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6373   enum tree_code compare;
6374   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6375   bool ok;
6376
6377   bound = cp->value;
6378   if (bound)
6379     {
6380       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6381       tree var_type = TREE_TYPE (var);
6382       gimple_seq stmts;
6383
6384       if (dump_file && (dump_flags & TDF_DETAILS))
6385         {
6386           fprintf (dump_file, "Replacing exit test: ");
6387           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6388         }
6389       compare = cp->comp;
6390       bound = unshare_expr (fold_convert (var_type, bound));
6391       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6392       if (stmts)
6393         gsi_insert_seq_on_edge_immediate (
6394                 loop_preheader_edge (data->current_loop),
6395                 stmts);
6396
6397       gimple_cond_set_lhs (use->stmt, var);
6398       gimple_cond_set_code (use->stmt, compare);
6399       gimple_cond_set_rhs (use->stmt, op);
6400       return;
6401     }
6402
6403   /* The induction variable elimination failed; just express the original
6404      giv.  */
6405   comp = get_computation (data->current_loop, use, cand);
6406   gcc_assert (comp != NULL_TREE);
6407
6408   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6409   gcc_assert (ok);
6410
6411   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6412                                      true, GSI_SAME_STMT);
6413 }
6414
6415 /* Rewrites USE using candidate CAND.  */
6416
6417 static void
6418 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6419 {
6420   switch (use->type)
6421     {
6422       case USE_NONLINEAR_EXPR:
6423         rewrite_use_nonlinear_expr (data, use, cand);
6424         break;
6425
6426       case USE_ADDRESS:
6427         rewrite_use_address (data, use, cand);
6428         break;
6429
6430       case USE_COMPARE:
6431         rewrite_use_compare (data, use, cand);
6432         break;
6433
6434       default:
6435         gcc_unreachable ();
6436     }
6437
6438   update_stmt (use->stmt);
6439 }
6440
6441 /* Rewrite the uses using the selected induction variables.  */
6442
6443 static void
6444 rewrite_uses (struct ivopts_data *data)
6445 {
6446   unsigned i;
6447   struct iv_cand *cand;
6448   struct iv_use *use;
6449
6450   for (i = 0; i < n_iv_uses (data); i++)
6451     {
6452       use = iv_use (data, i);
6453       cand = use->selected;
6454       gcc_assert (cand);
6455
6456       rewrite_use (data, use, cand);
6457     }
6458 }
6459
6460 /* Removes the ivs that are not used after rewriting.  */
6461
6462 static void
6463 remove_unused_ivs (struct ivopts_data *data)
6464 {
6465   unsigned j;
6466   bitmap_iterator bi;
6467   bitmap toremove = BITMAP_ALLOC (NULL);
6468
6469   /* Figure out an order in which to release SSA DEFs so that we don't
6470      release something that we'd have to propagate into a debug stmt
6471      afterwards.  */
6472   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6473     {
6474       struct version_info *info;
6475
6476       info = ver_info (data, j);
6477       if (info->iv
6478           && !integer_zerop (info->iv->step)
6479           && !info->inv_id
6480           && !info->iv->have_use_for
6481           && !info->preserve_biv)
6482         {
6483           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6484
6485           tree def = info->iv->ssa_name;
6486
6487           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6488             {
6489               imm_use_iterator imm_iter;
6490               use_operand_p use_p;
6491               gimple stmt;
6492               int count = 0;
6493
6494               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6495                 {
6496                   if (!gimple_debug_bind_p (stmt))
6497                     continue;
6498
6499                   /* We just want to determine whether to do nothing
6500                      (count == 0), to substitute the computed
6501                      expression into a single use of the SSA DEF by
6502                      itself (count == 1), or to use a debug temp
6503                      because the SSA DEF is used multiple times or as
6504                      part of a larger expression (count > 1). */
6505                   count++;
6506                   if (gimple_debug_bind_get_value (stmt) != def)
6507                     count++;
6508
6509                   if (count > 1)
6510                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6511                 }
6512
6513               if (!count)
6514                 continue;
6515
6516               struct iv_use dummy_use;
6517               struct iv_cand *best_cand = NULL, *cand;
6518               unsigned i, best_pref = 0, cand_pref;
6519
6520               memset (&dummy_use, 0, sizeof (dummy_use));
6521               dummy_use.iv = info->iv;
6522               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6523                 {
6524                   cand = iv_use (data, i)->selected;
6525                   if (cand == best_cand)
6526                     continue;
6527                   cand_pref = operand_equal_p (cand->iv->step,
6528                                                info->iv->step, 0)
6529                     ? 4 : 0;
6530                   cand_pref
6531                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6532                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6533                     ? 2 : 0;
6534                   cand_pref
6535                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6536                     ? 1 : 0;
6537                   if (best_cand == NULL || best_pref < cand_pref)
6538                     {
6539                       best_cand = cand;
6540                       best_pref = cand_pref;
6541                     }
6542                 }
6543
6544               if (!best_cand)
6545                 continue;
6546
6547               tree comp = get_computation_at (data->current_loop,
6548                                               &dummy_use, best_cand,
6549                                               SSA_NAME_DEF_STMT (def));
6550               if (!comp)
6551                 continue;
6552
6553               if (count > 1)
6554                 {
6555                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6556                   DECL_ARTIFICIAL (vexpr) = 1;
6557                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6558                   if (SSA_NAME_VAR (def))
6559                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6560                   else
6561                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6562                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6563                   gimple_stmt_iterator gsi;
6564
6565                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6566                     gsi = gsi_after_labels (gimple_bb
6567                                             (SSA_NAME_DEF_STMT (def)));
6568                   else
6569                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6570
6571                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6572                   comp = vexpr;
6573                 }
6574
6575               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6576                 {
6577                   if (!gimple_debug_bind_p (stmt))
6578                     continue;
6579
6580                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6581                     SET_USE (use_p, comp);
6582
6583                   update_stmt (stmt);
6584                 }
6585             }
6586         }
6587     }
6588
6589   release_defs_bitset (toremove);
6590
6591   BITMAP_FREE (toremove);
6592 }
6593
6594 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6595    for pointer_map_traverse.  */
6596
6597 static bool
6598 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6599                       void *data ATTRIBUTE_UNUSED)
6600 {
6601   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6602
6603   free (niter);
6604   return true;
6605 }
6606
6607 /* Frees data allocated by the optimization of a single loop.  */
6608
6609 static void
6610 free_loop_data (struct ivopts_data *data)
6611 {
6612   unsigned i, j;
6613   bitmap_iterator bi;
6614   tree obj;
6615
6616   if (data->niters)
6617     {
6618       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6619       pointer_map_destroy (data->niters);
6620       data->niters = NULL;
6621     }
6622
6623   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6624     {
6625       struct version_info *info;
6626
6627       info = ver_info (data, i);
6628       free (info->iv);
6629       info->iv = NULL;
6630       info->has_nonlin_use = false;
6631       info->preserve_biv = false;
6632       info->inv_id = 0;
6633     }
6634   bitmap_clear (data->relevant);
6635   bitmap_clear (data->important_candidates);
6636
6637   for (i = 0; i < n_iv_uses (data); i++)
6638     {
6639       struct iv_use *use = iv_use (data, i);
6640
6641       free (use->iv);
6642       BITMAP_FREE (use->related_cands);
6643       for (j = 0; j < use->n_map_members; j++)
6644         if (use->cost_map[j].depends_on)
6645           BITMAP_FREE (use->cost_map[j].depends_on);
6646       free (use->cost_map);
6647       free (use);
6648     }
6649   data->iv_uses.truncate (0);
6650
6651   for (i = 0; i < n_iv_cands (data); i++)
6652     {
6653       struct iv_cand *cand = iv_cand (data, i);
6654
6655       free (cand->iv);
6656       if (cand->depends_on)
6657         BITMAP_FREE (cand->depends_on);
6658       free (cand);
6659     }
6660   data->iv_candidates.truncate (0);
6661
6662   if (data->version_info_size < num_ssa_names)
6663     {
6664       data->version_info_size = 2 * num_ssa_names;
6665       free (data->version_info);
6666       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6667     }
6668
6669   data->max_inv_id = 0;
6670
6671   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6672     SET_DECL_RTL (obj, NULL_RTX);
6673
6674   decl_rtl_to_reset.truncate (0);
6675
6676   data->inv_expr_tab.empty ();
6677   data->inv_expr_id = 0;
6678 }
6679
6680 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6681    loop tree.  */
6682
6683 static void
6684 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6685 {
6686   free_loop_data (data);
6687   free (data->version_info);
6688   BITMAP_FREE (data->relevant);
6689   BITMAP_FREE (data->important_candidates);
6690
6691   decl_rtl_to_reset.release ();
6692   data->iv_uses.release ();
6693   data->iv_candidates.release ();
6694   data->inv_expr_tab.dispose ();
6695 }
6696
6697 /* Returns true if the loop body BODY includes any function calls.  */
6698
6699 static bool
6700 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6701 {
6702   gimple_stmt_iterator gsi;
6703   unsigned i;
6704
6705   for (i = 0; i < num_nodes; i++)
6706     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6707       {
6708         gimple stmt = gsi_stmt (gsi);
6709         if (is_gimple_call (stmt)
6710             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6711           return true;
6712       }
6713   return false;
6714 }
6715
6716 /* Optimizes the LOOP.  Returns true if anything changed.  */
6717
6718 static bool
6719 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6720 {
6721   bool changed = false;
6722   struct iv_ca *iv_ca;
6723   edge exit = single_dom_exit (loop);
6724   basic_block *body;
6725
6726   gcc_assert (!data->niters);
6727   data->current_loop = loop;
6728   data->speed = optimize_loop_for_speed_p (loop);
6729
6730   if (dump_file && (dump_flags & TDF_DETAILS))
6731     {
6732       fprintf (dump_file, "Processing loop %d\n", loop->num);
6733
6734       if (exit)
6735         {
6736           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6737                    exit->src->index, exit->dest->index);
6738           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6739           fprintf (dump_file, "\n");
6740         }
6741
6742       fprintf (dump_file, "\n");
6743     }
6744
6745   body = get_loop_body (loop);
6746   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6747   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6748   free (body);
6749
6750   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6751
6752   /* For each ssa name determines whether it behaves as an induction variable
6753      in some loop.  */
6754   if (!find_induction_variables (data))
6755     goto finish;
6756
6757   /* Finds interesting uses (item 1).  */
6758   find_interesting_uses (data);
6759   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6760     goto finish;
6761
6762   /* Finds candidates for the induction variables (item 2).  */
6763   find_iv_candidates (data);
6764
6765   /* Calculates the costs (item 3, part 1).  */
6766   determine_iv_costs (data);
6767   determine_use_iv_costs (data);
6768   determine_set_costs (data);
6769
6770   /* Find the optimal set of induction variables (item 3, part 2).  */
6771   iv_ca = find_optimal_iv_set (data);
6772   if (!iv_ca)
6773     goto finish;
6774   changed = true;
6775
6776   /* Create the new induction variables (item 4, part 1).  */
6777   create_new_ivs (data, iv_ca);
6778   iv_ca_free (&iv_ca);
6779
6780   /* Rewrite the uses (item 4, part 2).  */
6781   rewrite_uses (data);
6782
6783   /* Remove the ivs that are unused after rewriting.  */
6784   remove_unused_ivs (data);
6785
6786   /* We have changed the structure of induction variables; it might happen
6787      that definitions in the scev database refer to some of them that were
6788      eliminated.  */
6789   scev_reset ();
6790
6791 finish:
6792   free_loop_data (data);
6793
6794   return changed;
6795 }
6796
6797 /* Main entry point.  Optimizes induction variables in loops.  */
6798
6799 void
6800 tree_ssa_iv_optimize (void)
6801 {
6802   struct loop *loop;
6803   struct ivopts_data data;
6804   loop_iterator li;
6805
6806   tree_ssa_iv_optimize_init (&data);
6807
6808   /* Optimize the loops starting with the innermost ones.  */
6809   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6810     {
6811       if (dump_file && (dump_flags & TDF_DETAILS))
6812         flow_loop_dump (loop, dump_file, NULL, 1);
6813
6814       tree_ssa_iv_optimize_loop (&data, loop);
6815     }
6816
6817   tree_ssa_iv_optimize_finalize (&data);
6818 }