gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "gimplify.h"
  73 #include "gimple-ssa.h"
  74 #include "cgraph.h"
  75 #include "tree-cfg.h"
  76 #include "tree-phinodes.h"
  77 #include "ssa-iterators.h"
  78 #include "tree-ssanames.h"
  79 #include "tree-ssa-loop-ivopts.h"
  80 #include "tree-ssa-loop-manip.h"
  81 #include "tree-ssa-loop-niter.h"
  82 #include "tree-ssa-loop.h"
  83 #include "tree-dfa.h"
  84 #include "tree-ssa.h"
  85 #include "cfgloop.h"
  86 #include "tree-pass.h"
  87 #include "ggc.h"
  88 #include "insn-config.h"
  89 #include "pointer-set.h"
  90 #include "hash-table.h"
  91 #include "tree-chrec.h"
  92 #include "tree-scalar-evolution.h"
  93 #include "cfgloop.h"
  94 #include "params.h"
  95 #include "langhooks.h"
  96 #include "tree-affine.h"
  97 #include "target.h"
  98 #include "tree-inline.h"
  99 #include "tree-ssa-propagate.h"
 100 #include "expmed.h"
 101 #include "tree-ssa-address.h"
 102
 103 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 104    cost of different addressing modes.  This should be moved to a TBD
 105    interface between the GIMPLE and RTL worlds.  */
 106 #include "expr.h"
 107 #include "recog.h"
 108
 109 /* The infinite cost.  */
 110 #define INFTY 10000000
 111
 112 #define AVG_LOOP_NITER(LOOP) 5
 113
 114 /* Returns the expected number of loop iterations for LOOP.
 115    The average trip count is computed from profile data if it
 116    exists. */
 117
 118 static inline HOST_WIDE_INT
 119 avg_loop_niter (struct loop *loop)
 120 {
 121   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 122   if (niter == -1)
 123     return AVG_LOOP_NITER (loop);
 124
 125   return niter;
 126 }
 127
 128 /* Representation of the induction variable.  */
 129 struct iv
 130 {
 131   tree base;            /* Initial value of the iv.  */
 132   tree base_object;     /* A memory object to that the induction variable points.  */
 133   tree step;            /* Step of the iv (constant only).  */
 134   tree ssa_name;        /* The ssa name with the value.  */
 135   bool biv_p;           /* Is it a biv?  */
 136   bool have_use_for;    /* Do we already have a use for it?  */
 137   unsigned use_id;      /* The identifier in the use if it is the case.  */
 138 };
 139
 140 /* Per-ssa version information (induction variable descriptions, etc.).  */
 141 struct version_info
 142 {
 143   tree name;            /* The ssa name.  */
 144   struct iv *iv;        /* Induction variable description.  */
 145   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 146                            an expression that is not an induction variable.  */
 147   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 148   unsigned inv_id;      /* Id of an invariant.  */
 149 };
 150
 151 /* Types of uses.  */
 152 enum use_type
 153 {
 154   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 155   USE_ADDRESS,          /* Use in an address.  */
 156   USE_COMPARE           /* Use is a compare.  */
 157 };
 158
 159 /* Cost of a computation.  */
 160 typedef struct
 161 {
 162   int cost;             /* The runtime cost.  */
 163   unsigned complexity;  /* The estimate of the complexity of the code for
 164                            the computation (in no concrete units --
 165                            complexity field should be larger for more
 166                            complex expressions and addressing modes).  */
 167 } comp_cost;
 168
 169 static const comp_cost no_cost = {0, 0};
 170 static const comp_cost infinite_cost = {INFTY, INFTY};
 171
 172 /* The candidate - cost pair.  */
 173 struct cost_pair
 174 {
 175   struct iv_cand *cand; /* The candidate.  */
 176   comp_cost cost;       /* The cost.  */
 177   bitmap depends_on;    /* The list of invariants that have to be
 178                            preserved.  */
 179   tree value;           /* For final value elimination, the expression for
 180                            the final value of the iv.  For iv elimination,
 181                            the new bound to compare with.  */
 182   enum tree_code comp;  /* For iv elimination, the comparison.  */
 183   int inv_expr_id;      /* Loop invariant expression id.  */
 184 };
 185
 186 /* Use.  */
 187 struct iv_use
 188 {
 189   unsigned id;          /* The id of the use.  */
 190   enum use_type type;   /* Type of the use.  */
 191   struct iv *iv;        /* The induction variable it is based on.  */
 192   gimple stmt;          /* Statement in that it occurs.  */
 193   tree *op_p;           /* The place where it occurs.  */
 194   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 195                            important ones.  */
 196
 197   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 198   struct cost_pair *cost_map;
 199                         /* The costs wrto the iv candidates.  */
 200
 201   struct iv_cand *selected;
 202                         /* The selected candidate.  */
 203 };
 204
 205 /* The position where the iv is computed.  */
 206 enum iv_position
 207 {
 208   IP_NORMAL,            /* At the end, just before the exit condition.  */
 209   IP_END,               /* At the end of the latch block.  */
 210   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 211   IP_AFTER_USE,         /* Immediately after a specific use.  */
 212   IP_ORIGINAL           /* The original biv.  */
 213 };
 214
 215 /* The induction variable candidate.  */
 216 struct iv_cand
 217 {
 218   unsigned id;          /* The number of the candidate.  */
 219   bool important;       /* Whether this is an "important" candidate, i.e. such
 220                            that it should be considered by all uses.  */
 221   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 222   gimple incremented_at;/* For original biv, the statement where it is
 223                            incremented.  */
 224   tree var_before;      /* The variable used for it before increment.  */
 225   tree var_after;       /* The variable used for it after increment.  */
 226   struct iv *iv;        /* The value of the candidate.  NULL for
 227                            "pseudocandidate" used to indicate the possibility
 228                            to replace the final value of an iv by direct
 229                            computation of the value.  */
 230   unsigned cost;        /* Cost of the candidate.  */
 231   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 232   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 233                               where it is incremented.  */
 234   bitmap depends_on;    /* The list of invariants that are used in step of the
 235                            biv.  */
 236 };
 237
 238 /* Loop invariant expression hashtable entry.  */
 239 struct iv_inv_expr_ent
 240 {
 241   tree expr;
 242   int id;
 243   hashval_t hash;
 244 };
 245
 246 /* The data used by the induction variable optimizations.  */
 247
 248 typedef struct iv_use *iv_use_p;
 249
 250 typedef struct iv_cand *iv_cand_p;
 251
 252 /* Hashtable helpers.  */
 253
 254 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 255 {
 256   typedef iv_inv_expr_ent value_type;
 257   typedef iv_inv_expr_ent compare_type;
 258   static inline hashval_t hash (const value_type *);
 259   static inline bool equal (const value_type *, const compare_type *);
 260 };
 261
 262 /* Hash function for loop invariant expressions.  */
 263
 264 inline hashval_t
 265 iv_inv_expr_hasher::hash (const value_type *expr)
 266 {
 267   return expr->hash;
 268 }
 269
 270 /* Hash table equality function for expressions.  */
 271
 272 inline bool
 273 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 274 {
 275   return expr1->hash == expr2->hash
 276          && operand_equal_p (expr1->expr, expr2->expr, 0);
 277 }
 278
 279 struct ivopts_data
 280 {
 281   /* The currently optimized loop.  */
 282   struct loop *current_loop;
 283
 284   /* Numbers of iterations for all exits of the current loop.  */
 285   struct pointer_map_t *niters;
 286
 287   /* Number of registers used in it.  */
 288   unsigned regs_used;
 289
 290   /* The size of version_info array allocated.  */
 291   unsigned version_info_size;
 292
 293   /* The array of information for the ssa names.  */
 294   struct version_info *version_info;
 295
 296   /* The hashtable of loop invariant expressions created
 297      by ivopt.  */
 298   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 299
 300   /* Loop invariant expression id.  */
 301   int inv_expr_id;
 302
 303   /* The bitmap of indices in version_info whose value was changed.  */
 304   bitmap relevant;
 305
 306   /* The uses of induction variables.  */
 307   vec<iv_use_p> iv_uses;
 308
 309   /* The candidates.  */
 310   vec<iv_cand_p> iv_candidates;
 311
 312   /* A bitmap of important candidates.  */
 313   bitmap important_candidates;
 314
 315   /* The maximum invariant id.  */
 316   unsigned max_inv_id;
 317
 318   /* Whether to consider just related and important candidates when replacing a
 319      use.  */
 320   bool consider_all_candidates;
 321
 322   /* Are we optimizing for speed?  */
 323   bool speed;
 324
 325   /* Whether the loop body includes any function calls.  */
 326   bool body_includes_call;
 327
 328   /* Whether the loop body can only be exited via single exit.  */
 329   bool loop_single_exit_p;
 330 };
 331
 332 /* An assignment of iv candidates to uses.  */
 333
 334 struct iv_ca
 335 {
 336   /* The number of uses covered by the assignment.  */
 337   unsigned upto;
 338
 339   /* Number of uses that cannot be expressed by the candidates in the set.  */
 340   unsigned bad_uses;
 341
 342   /* Candidate assigned to a use, together with the related costs.  */
 343   struct cost_pair **cand_for_use;
 344
 345   /* Number of times each candidate is used.  */
 346   unsigned *n_cand_uses;
 347
 348   /* The candidates used.  */
 349   bitmap cands;
 350
 351   /* The number of candidates in the set.  */
 352   unsigned n_cands;
 353
 354   /* Total number of registers needed.  */
 355   unsigned n_regs;
 356
 357   /* Total cost of expressing uses.  */
 358   comp_cost cand_use_cost;
 359
 360   /* Total cost of candidates.  */
 361   unsigned cand_cost;
 362
 363   /* Number of times each invariant is used.  */
 364   unsigned *n_invariant_uses;
 365
 366   /* The array holding the number of uses of each loop
 367      invariant expressions created by ivopt.  */
 368   unsigned *used_inv_expr;
 369
 370   /* The number of created loop invariants.  */
 371   unsigned num_used_inv_expr;
 372
 373   /* Total cost of the assignment.  */
 374   comp_cost cost;
 375 };
 376
 377 /* Difference of two iv candidate assignments.  */
 378
 379 struct iv_ca_delta
 380 {
 381   /* Changed use.  */
 382   struct iv_use *use;
 383
 384   /* An old assignment (for rollback purposes).  */
 385   struct cost_pair *old_cp;
 386
 387   /* A new assignment.  */
 388   struct cost_pair *new_cp;
 389
 390   /* Next change in the list.  */
 391   struct iv_ca_delta *next_change;
 392 };
 393
 394 /* Bound on number of candidates below that all candidates are considered.  */
 395
 396 #define CONSIDER_ALL_CANDIDATES_BOUND \
 397   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 398
 399 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 400    optimizing such a loop would help, and it would take ages).  */
 401
 402 #define MAX_CONSIDERED_USES \
 403   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 404
 405 /* If there are at most this number of ivs in the set, try removing unnecessary
 406    ivs from the set always.  */
 407
 408 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 409   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 410
 411 /* The list of trees for that the decl_rtl field must be reset is stored
 412    here.  */
 413
 414 static vec<tree> decl_rtl_to_reset;
 415
 416 static comp_cost force_expr_to_var_cost (tree, bool);
 417
 418 /* Number of uses recorded in DATA.  */
 419
 420 static inline unsigned
 421 n_iv_uses (struct ivopts_data *data)
 422 {
 423   return data->iv_uses.length ();
 424 }
 425
 426 /* Ith use recorded in DATA.  */
 427
 428 static inline struct iv_use *
 429 iv_use (struct ivopts_data *data, unsigned i)
 430 {
 431   return data->iv_uses[i];
 432 }
 433
 434 /* Number of candidates recorded in DATA.  */
 435
 436 static inline unsigned
 437 n_iv_cands (struct ivopts_data *data)
 438 {
 439   return data->iv_candidates.length ();
 440 }
 441
 442 /* Ith candidate recorded in DATA.  */
 443
 444 static inline struct iv_cand *
 445 iv_cand (struct ivopts_data *data, unsigned i)
 446 {
 447   return data->iv_candidates[i];
 448 }
 449
 450 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 451
 452 edge
 453 single_dom_exit (struct loop *loop)
 454 {
 455   edge exit = single_exit (loop);
 456
 457   if (!exit)
 458     return NULL;
 459
 460   if (!just_once_each_iteration_p (loop, exit->src))
 461     return NULL;
 462
 463   return exit;
 464 }
 465
 466 /* Dumps information about the induction variable IV to FILE.  */
 467
 468 void
 469 dump_iv (FILE *file, struct iv *iv)
 470 {
 471   if (iv->ssa_name)
 472     {
 473       fprintf (file, "ssa name ");
 474       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 475       fprintf (file, "\n");
 476     }
 477
 478   fprintf (file, "  type ");
 479   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 480   fprintf (file, "\n");
 481
 482   if (iv->step)
 483     {
 484       fprintf (file, "  base ");
 485       print_generic_expr (file, iv->base, TDF_SLIM);
 486       fprintf (file, "\n");
 487
 488       fprintf (file, "  step ");
 489       print_generic_expr (file, iv->step, TDF_SLIM);
 490       fprintf (file, "\n");
 491     }
 492   else
 493     {
 494       fprintf (file, "  invariant ");
 495       print_generic_expr (file, iv->base, TDF_SLIM);
 496       fprintf (file, "\n");
 497     }
 498
 499   if (iv->base_object)
 500     {
 501       fprintf (file, "  base object ");
 502       print_generic_expr (file, iv->base_object, TDF_SLIM);
 503       fprintf (file, "\n");
 504     }
 505
 506   if (iv->biv_p)
 507     fprintf (file, "  is a biv\n");
 508 }
 509
 510 /* Dumps information about the USE to FILE.  */
 511
 512 void
 513 dump_use (FILE *file, struct iv_use *use)
 514 {
 515   fprintf (file, "use %d\n", use->id);
 516
 517   switch (use->type)
 518     {
 519     case USE_NONLINEAR_EXPR:
 520       fprintf (file, "  generic\n");
 521       break;
 522
 523     case USE_ADDRESS:
 524       fprintf (file, "  address\n");
 525       break;
 526
 527     case USE_COMPARE:
 528       fprintf (file, "  compare\n");
 529       break;
 530
 531     default:
 532       gcc_unreachable ();
 533     }
 534
 535   fprintf (file, "  in statement ");
 536   print_gimple_stmt (file, use->stmt, 0, 0);
 537   fprintf (file, "\n");
 538
 539   fprintf (file, "  at position ");
 540   if (use->op_p)
 541     print_generic_expr (file, *use->op_p, TDF_SLIM);
 542   fprintf (file, "\n");
 543
 544   dump_iv (file, use->iv);
 545
 546   if (use->related_cands)
 547     {
 548       fprintf (file, "  related candidates ");
 549       dump_bitmap (file, use->related_cands);
 550     }
 551 }
 552
 553 /* Dumps information about the uses to FILE.  */
 554
 555 void
 556 dump_uses (FILE *file, struct ivopts_data *data)
 557 {
 558   unsigned i;
 559   struct iv_use *use;
 560
 561   for (i = 0; i < n_iv_uses (data); i++)
 562     {
 563       use = iv_use (data, i);
 564
 565       dump_use (file, use);
 566       fprintf (file, "\n");
 567     }
 568 }
 569
 570 /* Dumps information about induction variable candidate CAND to FILE.  */
 571
 572 void
 573 dump_cand (FILE *file, struct iv_cand *cand)
 574 {
 575   struct iv *iv = cand->iv;
 576
 577   fprintf (file, "candidate %d%s\n",
 578            cand->id, cand->important ? " (important)" : "");
 579
 580   if (cand->depends_on)
 581     {
 582       fprintf (file, "  depends on ");
 583       dump_bitmap (file, cand->depends_on);
 584     }
 585
 586   if (!iv)
 587     {
 588       fprintf (file, "  final value replacement\n");
 589       return;
 590     }
 591
 592   if (cand->var_before)
 593     {
 594       fprintf (file, "  var_before ");
 595       print_generic_expr (file, cand->var_before, TDF_SLIM);
 596       fprintf (file, "\n");
 597     }
 598   if (cand->var_after)
 599     {
 600       fprintf (file, "  var_after ");
 601       print_generic_expr (file, cand->var_after, TDF_SLIM);
 602       fprintf (file, "\n");
 603     }
 604
 605   switch (cand->pos)
 606     {
 607     case IP_NORMAL:
 608       fprintf (file, "  incremented before exit test\n");
 609       break;
 610
 611     case IP_BEFORE_USE:
 612       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 613       break;
 614
 615     case IP_AFTER_USE:
 616       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 617       break;
 618
 619     case IP_END:
 620       fprintf (file, "  incremented at end\n");
 621       break;
 622
 623     case IP_ORIGINAL:
 624       fprintf (file, "  original biv\n");
 625       break;
 626     }
 627
 628   dump_iv (file, iv);
 629 }
 630
 631 /* Returns the info for ssa version VER.  */
 632
 633 static inline struct version_info *
 634 ver_info (struct ivopts_data *data, unsigned ver)
 635 {
 636   return data->version_info + ver;
 637 }
 638
 639 /* Returns the info for ssa name NAME.  */
 640
 641 static inline struct version_info *
 642 name_info (struct ivopts_data *data, tree name)
 643 {
 644   return ver_info (data, SSA_NAME_VERSION (name));
 645 }
 646
 647 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 648    emitted in LOOP.  */
 649
 650 static bool
 651 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 652 {
 653   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 654
 655   gcc_assert (bb);
 656
 657   if (sbb == loop->latch)
 658     return true;
 659
 660   if (sbb != bb)
 661     return false;
 662
 663   return stmt == last_stmt (bb);
 664 }
 665
 666 /* Returns true if STMT if after the place where the original induction
 667    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 668    if the positions are identical.  */
 669
 670 static bool
 671 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 672 {
 673   basic_block cand_bb = gimple_bb (cand->incremented_at);
 674   basic_block stmt_bb = gimple_bb (stmt);
 675
 676   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 677     return false;
 678
 679   if (stmt_bb != cand_bb)
 680     return true;
 681
 682   if (true_if_equal
 683       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 684     return true;
 685   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 686 }
 687
 688 /* Returns true if STMT if after the place where the induction variable
 689    CAND is incremented in LOOP.  */
 690
 691 static bool
 692 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 693 {
 694   switch (cand->pos)
 695     {
 696     case IP_END:
 697       return false;
 698
 699     case IP_NORMAL:
 700       return stmt_after_ip_normal_pos (loop, stmt);
 701
 702     case IP_ORIGINAL:
 703     case IP_AFTER_USE:
 704       return stmt_after_inc_pos (cand, stmt, false);
 705
 706     case IP_BEFORE_USE:
 707       return stmt_after_inc_pos (cand, stmt, true);
 708
 709     default:
 710       gcc_unreachable ();
 711     }
 712 }
 713
 714 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 715
 716 static bool
 717 abnormal_ssa_name_p (tree exp)
 718 {
 719   if (!exp)
 720     return false;
 721
 722   if (TREE_CODE (exp) != SSA_NAME)
 723     return false;
 724
 725   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 726 }
 727
 728 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 729    abnormal phi node.  Callback for for_each_index.  */
 730
 731 static bool
 732 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 733                                   void *data ATTRIBUTE_UNUSED)
 734 {
 735   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 736     {
 737       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 738         return false;
 739       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 740         return false;
 741     }
 742
 743   return !abnormal_ssa_name_p (*index);
 744 }
 745
 746 /* Returns true if EXPR contains a ssa name that occurs in an
 747    abnormal phi node.  */
 748
 749 bool
 750 contains_abnormal_ssa_name_p (tree expr)
 751 {
 752   enum tree_code code;
 753   enum tree_code_class codeclass;
 754
 755   if (!expr)
 756     return false;
 757
 758   code = TREE_CODE (expr);
 759   codeclass = TREE_CODE_CLASS (code);
 760
 761   if (code == SSA_NAME)
 762     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 763
 764   if (code == INTEGER_CST
 765       || is_gimple_min_invariant (expr))
 766     return false;
 767
 768   if (code == ADDR_EXPR)
 769     return !for_each_index (&TREE_OPERAND (expr, 0),
 770                             idx_contains_abnormal_ssa_name_p,
 771                             NULL);
 772
 773   if (code == COND_EXPR)
 774     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 775       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 776       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 777
 778   switch (codeclass)
 779     {
 780     case tcc_binary:
 781     case tcc_comparison:
 782       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 783         return true;
 784
 785       /* Fallthru.  */
 786     case tcc_unary:
 787       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 788         return true;
 789
 790       break;
 791
 792     default:
 793       gcc_unreachable ();
 794     }
 795
 796   return false;
 797 }
 798
 799 /*  Returns the structure describing number of iterations determined from
 800     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 801
 802 static struct tree_niter_desc *
 803 niter_for_exit (struct ivopts_data *data, edge exit)
 804 {
 805   struct tree_niter_desc *desc;
 806   void **slot;
 807
 808   if (!data->niters)
 809     {
 810       data->niters = pointer_map_create ();
 811       slot = NULL;
 812     }
 813   else
 814     slot = pointer_map_contains (data->niters, exit);
 815
 816   if (!slot)
 817     {
 818       /* Try to determine number of iterations.  We cannot safely work with ssa
 819          names that appear in phi nodes on abnormal edges, so that we do not
 820          create overlapping life ranges for them (PR 27283).  */
 821       desc = XNEW (struct tree_niter_desc);
 822       if (!number_of_iterations_exit (data->current_loop,
 823                                       exit, desc, true)
 824           || contains_abnormal_ssa_name_p (desc->niter))
 825         {
 826           XDELETE (desc);
 827           desc = NULL;
 828         }
 829       slot = pointer_map_insert (data->niters, exit);
 830       *slot = desc;
 831     }
 832   else
 833     desc = (struct tree_niter_desc *) *slot;
 834
 835   return desc;
 836 }
 837
 838 /* Returns the structure describing number of iterations determined from
 839    single dominating exit of DATA->current_loop, or NULL if something
 840    goes wrong.  */
 841
 842 static struct tree_niter_desc *
 843 niter_for_single_dom_exit (struct ivopts_data *data)
 844 {
 845   edge exit = single_dom_exit (data->current_loop);
 846
 847   if (!exit)
 848     return NULL;
 849
 850   return niter_for_exit (data, exit);
 851 }
 852
 853 /* Initializes data structures used by the iv optimization pass, stored
 854    in DATA.  */
 855
 856 static void
 857 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 858 {
 859   data->version_info_size = 2 * num_ssa_names;
 860   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 861   data->relevant = BITMAP_ALLOC (NULL);
 862   data->important_candidates = BITMAP_ALLOC (NULL);
 863   data->max_inv_id = 0;
 864   data->niters = NULL;
 865   data->iv_uses.create (20);
 866   data->iv_candidates.create (20);
 867   data->inv_expr_tab.create (10);
 868   data->inv_expr_id = 0;
 869   decl_rtl_to_reset.create (20);
 870 }
 871
 872 /* Returns a memory object to that EXPR points.  In case we are able to
 873    determine that it does not point to any such object, NULL is returned.  */
 874
 875 static tree
 876 determine_base_object (tree expr)
 877 {
 878   enum tree_code code = TREE_CODE (expr);
 879   tree base, obj;
 880
 881   /* If this is a pointer casted to any type, we need to determine
 882      the base object for the pointer; so handle conversions before
 883      throwing away non-pointer expressions.  */
 884   if (CONVERT_EXPR_P (expr))
 885     return determine_base_object (TREE_OPERAND (expr, 0));
 886
 887   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 888     return NULL_TREE;
 889
 890   switch (code)
 891     {
 892     case INTEGER_CST:
 893       return NULL_TREE;
 894
 895     case ADDR_EXPR:
 896       obj = TREE_OPERAND (expr, 0);
 897       base = get_base_address (obj);
 898
 899       if (!base)
 900         return expr;
 901
 902       if (TREE_CODE (base) == MEM_REF)
 903         return determine_base_object (TREE_OPERAND (base, 0));
 904
 905       return fold_convert (ptr_type_node,
 906                            build_fold_addr_expr (base));
 907
 908     case POINTER_PLUS_EXPR:
 909       return determine_base_object (TREE_OPERAND (expr, 0));
 910
 911     case PLUS_EXPR:
 912     case MINUS_EXPR:
 913       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 914       gcc_unreachable ();
 915
 916     default:
 917       return fold_convert (ptr_type_node, expr);
 918     }
 919 }
 920
 921 /* Allocates an induction variable with given initial value BASE and step STEP
 922    for loop LOOP.  */
 923
 924 static struct iv *
 925 alloc_iv (tree base, tree step)
 926 {
 927   tree base_object = base;
 928   struct iv *iv = XCNEW (struct iv);
 929   gcc_assert (step != NULL_TREE);
 930
 931   /* Lower all address expressions except ones with DECL_P as operand.
 932      By doing this:
 933        1) More accurate cost can be computed for address expressions;
 934        2) Duplicate candidates won't be created for bases in different
 935           forms, like &a[0] and &a.  */
 936   STRIP_NOPS (base_object);
 937   if (TREE_CODE (base_object) == ADDR_EXPR
 938       && !DECL_P (TREE_OPERAND (base_object, 0)))
 939     {
 940       aff_tree comb;
 941       double_int size;
 942       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
 943                                              &comb, &size);
 944       gcc_assert (base_object != NULL_TREE);
 945       base_object = build_fold_addr_expr (base_object);
 946       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 947     }
 948
 949   iv->base = base;
 950   iv->base_object = determine_base_object (base_object);
 951   iv->step = step;
 952   iv->biv_p = false;
 953   iv->have_use_for = false;
 954   iv->use_id = 0;
 955   iv->ssa_name = NULL_TREE;
 956
 957   return iv;
 958 }
 959
 960 /* Sets STEP and BASE for induction variable IV.  */
 961
 962 static void
 963 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 964 {
 965   struct version_info *info = name_info (data, iv);
 966
 967   gcc_assert (!info->iv);
 968
 969   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 970   info->iv = alloc_iv (base, step);
 971   info->iv->ssa_name = iv;
 972 }
 973
 974 /* Finds induction variable declaration for VAR.  */
 975
 976 static struct iv *
 977 get_iv (struct ivopts_data *data, tree var)
 978 {
 979   basic_block bb;
 980   tree type = TREE_TYPE (var);
 981
 982   if (!POINTER_TYPE_P (type)
 983       && !INTEGRAL_TYPE_P (type))
 984     return NULL;
 985
 986   if (!name_info (data, var)->iv)
 987     {
 988       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 989
 990       if (!bb
 991           || !flow_bb_inside_loop_p (data->current_loop, bb))
 992         set_iv (data, var, var, build_int_cst (type, 0));
 993     }
 994
 995   return name_info (data, var)->iv;
 996 }
 997
 998 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 999    not define a simple affine biv with nonzero step.  */
1000
1001 static tree
1002 determine_biv_step (gimple phi)
1003 {
1004   struct loop *loop = gimple_bb (phi)->loop_father;
1005   tree name = PHI_RESULT (phi);
1006   affine_iv iv;
1007
1008   if (virtual_operand_p (name))
1009     return NULL_TREE;
1010
1011   if (!simple_iv (loop, loop, name, &iv, true))
1012     return NULL_TREE;
1013
1014   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1015 }
1016
1017 /* Finds basic ivs.  */
1018
1019 static bool
1020 find_bivs (struct ivopts_data *data)
1021 {
1022   gimple phi;
1023   tree step, type, base;
1024   bool found = false;
1025   struct loop *loop = data->current_loop;
1026   gimple_stmt_iterator psi;
1027
1028   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1029     {
1030       phi = gsi_stmt (psi);
1031
1032       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1033         continue;
1034
1035       step = determine_biv_step (phi);
1036       if (!step)
1037         continue;
1038
1039       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1040       base = expand_simple_operations (base);
1041       if (contains_abnormal_ssa_name_p (base)
1042           || contains_abnormal_ssa_name_p (step))
1043         continue;
1044
1045       type = TREE_TYPE (PHI_RESULT (phi));
1046       base = fold_convert (type, base);
1047       if (step)
1048         {
1049           if (POINTER_TYPE_P (type))
1050             step = convert_to_ptrofftype (step);
1051           else
1052             step = fold_convert (type, step);
1053         }
1054
1055       set_iv (data, PHI_RESULT (phi), base, step);
1056       found = true;
1057     }
1058
1059   return found;
1060 }
1061
1062 /* Marks basic ivs.  */
1063
1064 static void
1065 mark_bivs (struct ivopts_data *data)
1066 {
1067   gimple phi;
1068   tree var;
1069   struct iv *iv, *incr_iv;
1070   struct loop *loop = data->current_loop;
1071   basic_block incr_bb;
1072   gimple_stmt_iterator psi;
1073
1074   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1075     {
1076       phi = gsi_stmt (psi);
1077
1078       iv = get_iv (data, PHI_RESULT (phi));
1079       if (!iv)
1080         continue;
1081
1082       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1083       incr_iv = get_iv (data, var);
1084       if (!incr_iv)
1085         continue;
1086
1087       /* If the increment is in the subloop, ignore it.  */
1088       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1089       if (incr_bb->loop_father != data->current_loop
1090           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1091         continue;
1092
1093       iv->biv_p = true;
1094       incr_iv->biv_p = true;
1095     }
1096 }
1097
1098 /* Checks whether STMT defines a linear induction variable and stores its
1099    parameters to IV.  */
1100
1101 static bool
1102 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1103 {
1104   tree lhs;
1105   struct loop *loop = data->current_loop;
1106
1107   iv->base = NULL_TREE;
1108   iv->step = NULL_TREE;
1109
1110   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1111     return false;
1112
1113   lhs = gimple_assign_lhs (stmt);
1114   if (TREE_CODE (lhs) != SSA_NAME)
1115     return false;
1116
1117   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1118     return false;
1119   iv->base = expand_simple_operations (iv->base);
1120
1121   if (contains_abnormal_ssa_name_p (iv->base)
1122       || contains_abnormal_ssa_name_p (iv->step))
1123     return false;
1124
1125   /* If STMT could throw, then do not consider STMT as defining a GIV.
1126      While this will suppress optimizations, we can not safely delete this
1127      GIV and associated statements, even if it appears it is not used.  */
1128   if (stmt_could_throw_p (stmt))
1129     return false;
1130
1131   return true;
1132 }
1133
1134 /* Finds general ivs in statement STMT.  */
1135
1136 static void
1137 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1138 {
1139   affine_iv iv;
1140
1141   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1142     return;
1143
1144   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1145 }
1146
1147 /* Finds general ivs in basic block BB.  */
1148
1149 static void
1150 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1151 {
1152   gimple_stmt_iterator bsi;
1153
1154   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1155     find_givs_in_stmt (data, gsi_stmt (bsi));
1156 }
1157
1158 /* Finds general ivs.  */
1159
1160 static void
1161 find_givs (struct ivopts_data *data)
1162 {
1163   struct loop *loop = data->current_loop;
1164   basic_block *body = get_loop_body_in_dom_order (loop);
1165   unsigned i;
1166
1167   for (i = 0; i < loop->num_nodes; i++)
1168     find_givs_in_bb (data, body[i]);
1169   free (body);
1170 }
1171
1172 /* For each ssa name defined in LOOP determines whether it is an induction
1173    variable and if so, its initial value and step.  */
1174
1175 static bool
1176 find_induction_variables (struct ivopts_data *data)
1177 {
1178   unsigned i;
1179   bitmap_iterator bi;
1180
1181   if (!find_bivs (data))
1182     return false;
1183
1184   find_givs (data);
1185   mark_bivs (data);
1186
1187   if (dump_file && (dump_flags & TDF_DETAILS))
1188     {
1189       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1190
1191       if (niter)
1192         {
1193           fprintf (dump_file, "  number of iterations ");
1194           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1195           if (!integer_zerop (niter->may_be_zero))
1196             {
1197               fprintf (dump_file, "; zero if ");
1198               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1199             }
1200           fprintf (dump_file, "\n\n");
1201         };
1202
1203       fprintf (dump_file, "Induction variables:\n\n");
1204
1205       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1206         {
1207           if (ver_info (data, i)->iv)
1208             dump_iv (dump_file, ver_info (data, i)->iv);
1209         }
1210     }
1211
1212   return true;
1213 }
1214
1215 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1216
1217 static struct iv_use *
1218 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1219             gimple stmt, enum use_type use_type)
1220 {
1221   struct iv_use *use = XCNEW (struct iv_use);
1222
1223   use->id = n_iv_uses (data);
1224   use->type = use_type;
1225   use->iv = iv;
1226   use->stmt = stmt;
1227   use->op_p = use_p;
1228   use->related_cands = BITMAP_ALLOC (NULL);
1229
1230   /* To avoid showing ssa name in the dumps, if it was not reset by the
1231      caller.  */
1232   iv->ssa_name = NULL_TREE;
1233
1234   if (dump_file && (dump_flags & TDF_DETAILS))
1235     dump_use (dump_file, use);
1236
1237   data->iv_uses.safe_push (use);
1238
1239   return use;
1240 }
1241
1242 /* Checks whether OP is a loop-level invariant and if so, records it.
1243    NONLINEAR_USE is true if the invariant is used in a way we do not
1244    handle specially.  */
1245
1246 static void
1247 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1248 {
1249   basic_block bb;
1250   struct version_info *info;
1251
1252   if (TREE_CODE (op) != SSA_NAME
1253       || virtual_operand_p (op))
1254     return;
1255
1256   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1257   if (bb
1258       && flow_bb_inside_loop_p (data->current_loop, bb))
1259     return;
1260
1261   info = name_info (data, op);
1262   info->name = op;
1263   info->has_nonlin_use |= nonlinear_use;
1264   if (!info->inv_id)
1265     info->inv_id = ++data->max_inv_id;
1266   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1267 }
1268
1269 /* Checks whether the use OP is interesting and if so, records it.  */
1270
1271 static struct iv_use *
1272 find_interesting_uses_op (struct ivopts_data *data, tree op)
1273 {
1274   struct iv *iv;
1275   struct iv *civ;
1276   gimple stmt;
1277   struct iv_use *use;
1278
1279   if (TREE_CODE (op) != SSA_NAME)
1280     return NULL;
1281
1282   iv = get_iv (data, op);
1283   if (!iv)
1284     return NULL;
1285
1286   if (iv->have_use_for)
1287     {
1288       use = iv_use (data, iv->use_id);
1289
1290       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1291       return use;
1292     }
1293
1294   if (integer_zerop (iv->step))
1295     {
1296       record_invariant (data, op, true);
1297       return NULL;
1298     }
1299   iv->have_use_for = true;
1300
1301   civ = XNEW (struct iv);
1302   *civ = *iv;
1303
1304   stmt = SSA_NAME_DEF_STMT (op);
1305   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1306               || is_gimple_assign (stmt));
1307
1308   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1309   iv->use_id = use->id;
1310
1311   return use;
1312 }
1313
1314 /* Given a condition in statement STMT, checks whether it is a compare
1315    of an induction variable and an invariant.  If this is the case,
1316    CONTROL_VAR is set to location of the iv, BOUND to the location of
1317    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1318    induction variable descriptions, and true is returned.  If this is not
1319    the case, CONTROL_VAR and BOUND are set to the arguments of the
1320    condition and false is returned.  */
1321
1322 static bool
1323 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1324                        tree **control_var, tree **bound,
1325                        struct iv **iv_var, struct iv **iv_bound)
1326 {
1327   /* The objects returned when COND has constant operands.  */
1328   static struct iv const_iv;
1329   static tree zero;
1330   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1331   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1332   bool ret = false;
1333
1334   if (gimple_code (stmt) == GIMPLE_COND)
1335     {
1336       op0 = gimple_cond_lhs_ptr (stmt);
1337       op1 = gimple_cond_rhs_ptr (stmt);
1338     }
1339   else
1340     {
1341       op0 = gimple_assign_rhs1_ptr (stmt);
1342       op1 = gimple_assign_rhs2_ptr (stmt);
1343     }
1344
1345   zero = integer_zero_node;
1346   const_iv.step = integer_zero_node;
1347
1348   if (TREE_CODE (*op0) == SSA_NAME)
1349     iv0 = get_iv (data, *op0);
1350   if (TREE_CODE (*op1) == SSA_NAME)
1351     iv1 = get_iv (data, *op1);
1352
1353   /* Exactly one of the compared values must be an iv, and the other one must
1354      be an invariant.  */
1355   if (!iv0 || !iv1)
1356     goto end;
1357
1358   if (integer_zerop (iv0->step))
1359     {
1360       /* Control variable may be on the other side.  */
1361       tmp_op = op0; op0 = op1; op1 = tmp_op;
1362       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1363     }
1364   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1365
1366 end:
1367   if (control_var)
1368     *control_var = op0;;
1369   if (iv_var)
1370     *iv_var = iv0;;
1371   if (bound)
1372     *bound = op1;
1373   if (iv_bound)
1374     *iv_bound = iv1;
1375
1376   return ret;
1377 }
1378
1379 /* Checks whether the condition in STMT is interesting and if so,
1380    records it.  */
1381
1382 static void
1383 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1384 {
1385   tree *var_p, *bound_p;
1386   struct iv *var_iv, *civ;
1387
1388   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1389     {
1390       find_interesting_uses_op (data, *var_p);
1391       find_interesting_uses_op (data, *bound_p);
1392       return;
1393     }
1394
1395   civ = XNEW (struct iv);
1396   *civ = *var_iv;
1397   record_use (data, NULL, civ, stmt, USE_COMPARE);
1398 }
1399
1400 /* Returns the outermost loop EXPR is obviously invariant in
1401    relative to the loop LOOP, i.e. if all its operands are defined
1402    outside of the returned loop.  Returns NULL if EXPR is not
1403    even obviously invariant in LOOP.  */
1404
1405 struct loop *
1406 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1407 {
1408   basic_block def_bb;
1409   unsigned i, len;
1410
1411   if (is_gimple_min_invariant (expr))
1412     return current_loops->tree_root;
1413
1414   if (TREE_CODE (expr) == SSA_NAME)
1415     {
1416       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1417       if (def_bb)
1418         {
1419           if (flow_bb_inside_loop_p (loop, def_bb))
1420             return NULL;
1421           return superloop_at_depth (loop,
1422                                      loop_depth (def_bb->loop_father) + 1);
1423         }
1424
1425       return current_loops->tree_root;
1426     }
1427
1428   if (!EXPR_P (expr))
1429     return NULL;
1430
1431   unsigned maxdepth = 0;
1432   len = TREE_OPERAND_LENGTH (expr);
1433   for (i = 0; i < len; i++)
1434     {
1435       struct loop *ivloop;
1436       if (!TREE_OPERAND (expr, i))
1437         continue;
1438
1439       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1440       if (!ivloop)
1441         return NULL;
1442       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1443     }
1444
1445   return superloop_at_depth (loop, maxdepth);
1446 }
1447
1448 /* Returns true if expression EXPR is obviously invariant in LOOP,
1449    i.e. if all its operands are defined outside of the LOOP.  LOOP
1450    should not be the function body.  */
1451
1452 bool
1453 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1454 {
1455   basic_block def_bb;
1456   unsigned i, len;
1457
1458   gcc_assert (loop_depth (loop) > 0);
1459
1460   if (is_gimple_min_invariant (expr))
1461     return true;
1462
1463   if (TREE_CODE (expr) == SSA_NAME)
1464     {
1465       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1466       if (def_bb
1467           && flow_bb_inside_loop_p (loop, def_bb))
1468         return false;
1469
1470       return true;
1471     }
1472
1473   if (!EXPR_P (expr))
1474     return false;
1475
1476   len = TREE_OPERAND_LENGTH (expr);
1477   for (i = 0; i < len; i++)
1478     if (TREE_OPERAND (expr, i)
1479         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1480       return false;
1481
1482   return true;
1483 }
1484
1485 /* Cumulates the steps of indices into DATA and replaces their values with the
1486    initial ones.  Returns false when the value of the index cannot be determined.
1487    Callback for for_each_index.  */
1488
1489 struct ifs_ivopts_data
1490 {
1491   struct ivopts_data *ivopts_data;
1492   gimple stmt;
1493   tree step;
1494 };
1495
1496 static bool
1497 idx_find_step (tree base, tree *idx, void *data)
1498 {
1499   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1500   struct iv *iv;
1501   tree step, iv_base, iv_step, lbound, off;
1502   struct loop *loop = dta->ivopts_data->current_loop;
1503
1504   /* If base is a component ref, require that the offset of the reference
1505      be invariant.  */
1506   if (TREE_CODE (base) == COMPONENT_REF)
1507     {
1508       off = component_ref_field_offset (base);
1509       return expr_invariant_in_loop_p (loop, off);
1510     }
1511
1512   /* If base is array, first check whether we will be able to move the
1513      reference out of the loop (in order to take its address in strength
1514      reduction).  In order for this to work we need both lower bound
1515      and step to be loop invariants.  */
1516   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1517     {
1518       /* Moreover, for a range, the size needs to be invariant as well.  */
1519       if (TREE_CODE (base) == ARRAY_RANGE_REF
1520           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1521         return false;
1522
1523       step = array_ref_element_size (base);
1524       lbound = array_ref_low_bound (base);
1525
1526       if (!expr_invariant_in_loop_p (loop, step)
1527           || !expr_invariant_in_loop_p (loop, lbound))
1528         return false;
1529     }
1530
1531   if (TREE_CODE (*idx) != SSA_NAME)
1532     return true;
1533
1534   iv = get_iv (dta->ivopts_data, *idx);
1535   if (!iv)
1536     return false;
1537
1538   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1539           *&x[0], which is not folded and does not trigger the
1540           ARRAY_REF path below.  */
1541   *idx = iv->base;
1542
1543   if (integer_zerop (iv->step))
1544     return true;
1545
1546   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1547     {
1548       step = array_ref_element_size (base);
1549
1550       /* We only handle addresses whose step is an integer constant.  */
1551       if (TREE_CODE (step) != INTEGER_CST)
1552         return false;
1553     }
1554   else
1555     /* The step for pointer arithmetics already is 1 byte.  */
1556     step = size_one_node;
1557
1558   iv_base = iv->base;
1559   iv_step = iv->step;
1560   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1561                             sizetype, &iv_base, &iv_step, dta->stmt,
1562                             false))
1563     {
1564       /* The index might wrap.  */
1565       return false;
1566     }
1567
1568   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1569   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1570
1571   return true;
1572 }
1573
1574 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1575    object is passed to it in DATA.  */
1576
1577 static bool
1578 idx_record_use (tree base, tree *idx,
1579                 void *vdata)
1580 {
1581   struct ivopts_data *data = (struct ivopts_data *) vdata;
1582   find_interesting_uses_op (data, *idx);
1583   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1584     {
1585       find_interesting_uses_op (data, array_ref_element_size (base));
1586       find_interesting_uses_op (data, array_ref_low_bound (base));
1587     }
1588   return true;
1589 }
1590
1591 /* If we can prove that TOP = cst * BOT for some constant cst,
1592    store cst to MUL and return true.  Otherwise return false.
1593    The returned value is always sign-extended, regardless of the
1594    signedness of TOP and BOT.  */
1595
1596 static bool
1597 constant_multiple_of (tree top, tree bot, double_int *mul)
1598 {
1599   tree mby;
1600   enum tree_code code;
1601   double_int res, p0, p1;
1602   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1603
1604   STRIP_NOPS (top);
1605   STRIP_NOPS (bot);
1606
1607   if (operand_equal_p (top, bot, 0))
1608     {
1609       *mul = double_int_one;
1610       return true;
1611     }
1612
1613   code = TREE_CODE (top);
1614   switch (code)
1615     {
1616     case MULT_EXPR:
1617       mby = TREE_OPERAND (top, 1);
1618       if (TREE_CODE (mby) != INTEGER_CST)
1619         return false;
1620
1621       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1622         return false;
1623
1624       *mul = (res * tree_to_double_int (mby)).sext (precision);
1625       return true;
1626
1627     case PLUS_EXPR:
1628     case MINUS_EXPR:
1629       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1630           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1631         return false;
1632
1633       if (code == MINUS_EXPR)
1634         p1 = -p1;
1635       *mul = (p0 + p1).sext (precision);
1636       return true;
1637
1638     case INTEGER_CST:
1639       if (TREE_CODE (bot) != INTEGER_CST)
1640         return false;
1641
1642       p0 = tree_to_double_int (top).sext (precision);
1643       p1 = tree_to_double_int (bot).sext (precision);
1644       if (p1.is_zero ())
1645         return false;
1646       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1647       return res.is_zero ();
1648
1649     default:
1650       return false;
1651     }
1652 }
1653
1654 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1655
1656 static bool
1657 may_be_unaligned_p (tree ref, tree step)
1658 {
1659   tree base;
1660   tree base_type;
1661   HOST_WIDE_INT bitsize;
1662   HOST_WIDE_INT bitpos;
1663   tree toffset;
1664   enum machine_mode mode;
1665   int unsignedp, volatilep;
1666   unsigned base_align;
1667
1668   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1669      thus they are not misaligned.  */
1670   if (TREE_CODE (ref) == TARGET_MEM_REF)
1671     return false;
1672
1673   /* The test below is basically copy of what expr.c:normal_inner_ref
1674      does to check whether the object must be loaded by parts when
1675      STRICT_ALIGNMENT is true.  */
1676   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1677                               &unsignedp, &volatilep, true);
1678   base_type = TREE_TYPE (base);
1679   base_align = get_object_alignment (base);
1680   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1681
1682   if (mode != BLKmode)
1683     {
1684       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1685
1686       if (base_align < mode_align
1687           || (bitpos % mode_align) != 0
1688           || (bitpos % BITS_PER_UNIT) != 0)
1689         return true;
1690
1691       if (toffset
1692           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1693         return true;
1694
1695       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1696         return true;
1697     }
1698
1699   return false;
1700 }
1701
1702 /* Return true if EXPR may be non-addressable.   */
1703
1704 bool
1705 may_be_nonaddressable_p (tree expr)
1706 {
1707   switch (TREE_CODE (expr))
1708     {
1709     case TARGET_MEM_REF:
1710       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1711          target, thus they are always addressable.  */
1712       return false;
1713
1714     case COMPONENT_REF:
1715       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1716              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1717
1718     case VIEW_CONVERT_EXPR:
1719       /* This kind of view-conversions may wrap non-addressable objects
1720          and make them look addressable.  After some processing the
1721          non-addressability may be uncovered again, causing ADDR_EXPRs
1722          of inappropriate objects to be built.  */
1723       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1724           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1725         return true;
1726
1727       /* ... fall through ... */
1728
1729     case ARRAY_REF:
1730     case ARRAY_RANGE_REF:
1731       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1732
1733     CASE_CONVERT:
1734       return true;
1735
1736     default:
1737       break;
1738     }
1739
1740   return false;
1741 }
1742
1743 /* Finds addresses in *OP_P inside STMT.  */
1744
1745 static void
1746 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1747 {
1748   tree base = *op_p, step = size_zero_node;
1749   struct iv *civ;
1750   struct ifs_ivopts_data ifs_ivopts_data;
1751
1752   /* Do not play with volatile memory references.  A bit too conservative,
1753      perhaps, but safe.  */
1754   if (gimple_has_volatile_ops (stmt))
1755     goto fail;
1756
1757   /* Ignore bitfields for now.  Not really something terribly complicated
1758      to handle.  TODO.  */
1759   if (TREE_CODE (base) == BIT_FIELD_REF)
1760     goto fail;
1761
1762   base = unshare_expr (base);
1763
1764   if (TREE_CODE (base) == TARGET_MEM_REF)
1765     {
1766       tree type = build_pointer_type (TREE_TYPE (base));
1767       tree astep;
1768
1769       if (TMR_BASE (base)
1770           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1771         {
1772           civ = get_iv (data, TMR_BASE (base));
1773           if (!civ)
1774             goto fail;
1775
1776           TMR_BASE (base) = civ->base;
1777           step = civ->step;
1778         }
1779       if (TMR_INDEX2 (base)
1780           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1781         {
1782           civ = get_iv (data, TMR_INDEX2 (base));
1783           if (!civ)
1784             goto fail;
1785
1786           TMR_INDEX2 (base) = civ->base;
1787           step = civ->step;
1788         }
1789       if (TMR_INDEX (base)
1790           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1791         {
1792           civ = get_iv (data, TMR_INDEX (base));
1793           if (!civ)
1794             goto fail;
1795
1796           TMR_INDEX (base) = civ->base;
1797           astep = civ->step;
1798
1799           if (astep)
1800             {
1801               if (TMR_STEP (base))
1802                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1803
1804               step = fold_build2 (PLUS_EXPR, type, step, astep);
1805             }
1806         }
1807
1808       if (integer_zerop (step))
1809         goto fail;
1810       base = tree_mem_ref_addr (type, base);
1811     }
1812   else
1813     {
1814       ifs_ivopts_data.ivopts_data = data;
1815       ifs_ivopts_data.stmt = stmt;
1816       ifs_ivopts_data.step = size_zero_node;
1817       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1818           || integer_zerop (ifs_ivopts_data.step))
1819         goto fail;
1820       step = ifs_ivopts_data.step;
1821
1822       /* Check that the base expression is addressable.  This needs
1823          to be done after substituting bases of IVs into it.  */
1824       if (may_be_nonaddressable_p (base))
1825         goto fail;
1826
1827       /* Moreover, on strict alignment platforms, check that it is
1828          sufficiently aligned.  */
1829       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1830         goto fail;
1831
1832       base = build_fold_addr_expr (base);
1833
1834       /* Substituting bases of IVs into the base expression might
1835          have caused folding opportunities.  */
1836       if (TREE_CODE (base) == ADDR_EXPR)
1837         {
1838           tree *ref = &TREE_OPERAND (base, 0);
1839           while (handled_component_p (*ref))
1840             ref = &TREE_OPERAND (*ref, 0);
1841           if (TREE_CODE (*ref) == MEM_REF)
1842             {
1843               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1844                                       TREE_OPERAND (*ref, 0),
1845                                       TREE_OPERAND (*ref, 1));
1846               if (tem)
1847                 *ref = tem;
1848             }
1849         }
1850     }
1851
1852   civ = alloc_iv (base, step);
1853   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1854   return;
1855
1856 fail:
1857   for_each_index (op_p, idx_record_use, data);
1858 }
1859
1860 /* Finds and records invariants used in STMT.  */
1861
1862 static void
1863 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1864 {
1865   ssa_op_iter iter;
1866   use_operand_p use_p;
1867   tree op;
1868
1869   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1870     {
1871       op = USE_FROM_PTR (use_p);
1872       record_invariant (data, op, false);
1873     }
1874 }
1875
1876 /* Finds interesting uses of induction variables in the statement STMT.  */
1877
1878 static void
1879 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1880 {
1881   struct iv *iv;
1882   tree op, *lhs, *rhs;
1883   ssa_op_iter iter;
1884   use_operand_p use_p;
1885   enum tree_code code;
1886
1887   find_invariants_stmt (data, stmt);
1888
1889   if (gimple_code (stmt) == GIMPLE_COND)
1890     {
1891       find_interesting_uses_cond (data, stmt);
1892       return;
1893     }
1894
1895   if (is_gimple_assign (stmt))
1896     {
1897       lhs = gimple_assign_lhs_ptr (stmt);
1898       rhs = gimple_assign_rhs1_ptr (stmt);
1899
1900       if (TREE_CODE (*lhs) == SSA_NAME)
1901         {
1902           /* If the statement defines an induction variable, the uses are not
1903              interesting by themselves.  */
1904
1905           iv = get_iv (data, *lhs);
1906
1907           if (iv && !integer_zerop (iv->step))
1908             return;
1909         }
1910
1911       code = gimple_assign_rhs_code (stmt);
1912       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1913           && (REFERENCE_CLASS_P (*rhs)
1914               || is_gimple_val (*rhs)))
1915         {
1916           if (REFERENCE_CLASS_P (*rhs))
1917             find_interesting_uses_address (data, stmt, rhs);
1918           else
1919             find_interesting_uses_op (data, *rhs);
1920
1921           if (REFERENCE_CLASS_P (*lhs))
1922             find_interesting_uses_address (data, stmt, lhs);
1923           return;
1924         }
1925       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1926         {
1927           find_interesting_uses_cond (data, stmt);
1928           return;
1929         }
1930
1931       /* TODO -- we should also handle address uses of type
1932
1933          memory = call (whatever);
1934
1935          and
1936
1937          call (memory).  */
1938     }
1939
1940   if (gimple_code (stmt) == GIMPLE_PHI
1941       && gimple_bb (stmt) == data->current_loop->header)
1942     {
1943       iv = get_iv (data, PHI_RESULT (stmt));
1944
1945       if (iv && !integer_zerop (iv->step))
1946         return;
1947     }
1948
1949   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1950     {
1951       op = USE_FROM_PTR (use_p);
1952
1953       if (TREE_CODE (op) != SSA_NAME)
1954         continue;
1955
1956       iv = get_iv (data, op);
1957       if (!iv)
1958         continue;
1959
1960       find_interesting_uses_op (data, op);
1961     }
1962 }
1963
1964 /* Finds interesting uses of induction variables outside of loops
1965    on loop exit edge EXIT.  */
1966
1967 static void
1968 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1969 {
1970   gimple phi;
1971   gimple_stmt_iterator psi;
1972   tree def;
1973
1974   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1975     {
1976       phi = gsi_stmt (psi);
1977       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1978       if (!virtual_operand_p (def))
1979         find_interesting_uses_op (data, def);
1980     }
1981 }
1982
1983 /* Finds uses of the induction variables that are interesting.  */
1984
1985 static void
1986 find_interesting_uses (struct ivopts_data *data)
1987 {
1988   basic_block bb;
1989   gimple_stmt_iterator bsi;
1990   basic_block *body = get_loop_body (data->current_loop);
1991   unsigned i;
1992   struct version_info *info;
1993   edge e;
1994
1995   if (dump_file && (dump_flags & TDF_DETAILS))
1996     fprintf (dump_file, "Uses:\n\n");
1997
1998   for (i = 0; i < data->current_loop->num_nodes; i++)
1999     {
2000       edge_iterator ei;
2001       bb = body[i];
2002
2003       FOR_EACH_EDGE (e, ei, bb->succs)
2004         if (e->dest != EXIT_BLOCK_PTR
2005             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2006           find_interesting_uses_outside (data, e);
2007
2008       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2009         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2010       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2011         if (!is_gimple_debug (gsi_stmt (bsi)))
2012           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2013     }
2014
2015   if (dump_file && (dump_flags & TDF_DETAILS))
2016     {
2017       bitmap_iterator bi;
2018
2019       fprintf (dump_file, "\n");
2020
2021       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2022         {
2023           info = ver_info (data, i);
2024           if (info->inv_id)
2025             {
2026               fprintf (dump_file, "  ");
2027               print_generic_expr (dump_file, info->name, TDF_SLIM);
2028               fprintf (dump_file, " is invariant (%d)%s\n",
2029                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2030             }
2031         }
2032
2033       fprintf (dump_file, "\n");
2034     }
2035
2036   free (body);
2037 }
2038
2039 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2040    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2041    we are at the top-level of the processed address.  */
2042
2043 static tree
2044 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2045                 HOST_WIDE_INT *offset)
2046 {
2047   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2048   enum tree_code code;
2049   tree type, orig_type = TREE_TYPE (expr);
2050   HOST_WIDE_INT off0, off1, st;
2051   tree orig_expr = expr;
2052
2053   STRIP_NOPS (expr);
2054
2055   type = TREE_TYPE (expr);
2056   code = TREE_CODE (expr);
2057   *offset = 0;
2058
2059   switch (code)
2060     {
2061     case INTEGER_CST:
2062       if (!cst_and_fits_in_hwi (expr)
2063           || integer_zerop (expr))
2064         return orig_expr;
2065
2066       *offset = int_cst_value (expr);
2067       return build_int_cst (orig_type, 0);
2068
2069     case POINTER_PLUS_EXPR:
2070     case PLUS_EXPR:
2071     case MINUS_EXPR:
2072       op0 = TREE_OPERAND (expr, 0);
2073       op1 = TREE_OPERAND (expr, 1);
2074
2075       op0 = strip_offset_1 (op0, false, false, &off0);
2076       op1 = strip_offset_1 (op1, false, false, &off1);
2077
2078       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2079       if (op0 == TREE_OPERAND (expr, 0)
2080           && op1 == TREE_OPERAND (expr, 1))
2081         return orig_expr;
2082
2083       if (integer_zerop (op1))
2084         expr = op0;
2085       else if (integer_zerop (op0))
2086         {
2087           if (code == MINUS_EXPR)
2088             expr = fold_build1 (NEGATE_EXPR, type, op1);
2089           else
2090             expr = op1;
2091         }
2092       else
2093         expr = fold_build2 (code, type, op0, op1);
2094
2095       return fold_convert (orig_type, expr);
2096
2097     case MULT_EXPR:
2098       op1 = TREE_OPERAND (expr, 1);
2099       if (!cst_and_fits_in_hwi (op1))
2100         return orig_expr;
2101
2102       op0 = TREE_OPERAND (expr, 0);
2103       op0 = strip_offset_1 (op0, false, false, &off0);
2104       if (op0 == TREE_OPERAND (expr, 0))
2105         return orig_expr;
2106
2107       *offset = off0 * int_cst_value (op1);
2108       if (integer_zerop (op0))
2109         expr = op0;
2110       else
2111         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2112
2113       return fold_convert (orig_type, expr);
2114
2115     case ARRAY_REF:
2116     case ARRAY_RANGE_REF:
2117       if (!inside_addr)
2118         return orig_expr;
2119
2120       step = array_ref_element_size (expr);
2121       if (!cst_and_fits_in_hwi (step))
2122         break;
2123
2124       st = int_cst_value (step);
2125       op1 = TREE_OPERAND (expr, 1);
2126       op1 = strip_offset_1 (op1, false, false, &off1);
2127       *offset = off1 * st;
2128
2129       if (top_compref
2130           && integer_zerop (op1))
2131         {
2132           /* Strip the component reference completely.  */
2133           op0 = TREE_OPERAND (expr, 0);
2134           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2135           *offset += off0;
2136           return op0;
2137         }
2138       break;
2139
2140     case COMPONENT_REF:
2141       {
2142         tree field;
2143
2144         if (!inside_addr)
2145           return orig_expr;
2146
2147         tmp = component_ref_field_offset (expr);
2148         field = TREE_OPERAND (expr, 1);
2149         if (top_compref
2150             && cst_and_fits_in_hwi (tmp)
2151             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2152           {
2153             HOST_WIDE_INT boffset, abs_off;
2154
2155             /* Strip the component reference completely.  */
2156             op0 = TREE_OPERAND (expr, 0);
2157             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2158             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2159             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2160             if (boffset < 0)
2161               abs_off = -abs_off;
2162
2163             *offset = off0 + int_cst_value (tmp) + abs_off;
2164             return op0;
2165           }
2166       }
2167       break;
2168
2169     case ADDR_EXPR:
2170       op0 = TREE_OPERAND (expr, 0);
2171       op0 = strip_offset_1 (op0, true, true, &off0);
2172       *offset += off0;
2173
2174       if (op0 == TREE_OPERAND (expr, 0))
2175         return orig_expr;
2176
2177       expr = build_fold_addr_expr (op0);
2178       return fold_convert (orig_type, expr);
2179
2180     case MEM_REF:
2181       /* ???  Offset operand?  */
2182       inside_addr = false;
2183       break;
2184
2185     default:
2186       return orig_expr;
2187     }
2188
2189   /* Default handling of expressions for that we want to recurse into
2190      the first operand.  */
2191   op0 = TREE_OPERAND (expr, 0);
2192   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2193   *offset += off0;
2194
2195   if (op0 == TREE_OPERAND (expr, 0)
2196       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2197     return orig_expr;
2198
2199   expr = copy_node (expr);
2200   TREE_OPERAND (expr, 0) = op0;
2201   if (op1)
2202     TREE_OPERAND (expr, 1) = op1;
2203
2204   /* Inside address, we might strip the top level component references,
2205      thus changing type of the expression.  Handling of ADDR_EXPR
2206      will fix that.  */
2207   expr = fold_convert (orig_type, expr);
2208
2209   return expr;
2210 }
2211
2212 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2213
2214 static tree
2215 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2216 {
2217   HOST_WIDE_INT off;
2218   tree core = strip_offset_1 (expr, false, false, &off);
2219   *offset = off;
2220   return core;
2221 }
2222
2223 /* Returns variant of TYPE that can be used as base for different uses.
2224    We return unsigned type with the same precision, which avoids problems
2225    with overflows.  */
2226
2227 static tree
2228 generic_type_for (tree type)
2229 {
2230   if (POINTER_TYPE_P (type))
2231     return unsigned_type_for (type);
2232
2233   if (TYPE_UNSIGNED (type))
2234     return type;
2235
2236   return unsigned_type_for (type);
2237 }
2238
2239 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2240    the bitmap to that we should store it.  */
2241
2242 static struct ivopts_data *fd_ivopts_data;
2243 static tree
2244 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2245 {
2246   bitmap *depends_on = (bitmap *) data;
2247   struct version_info *info;
2248
2249   if (TREE_CODE (*expr_p) != SSA_NAME)
2250     return NULL_TREE;
2251   info = name_info (fd_ivopts_data, *expr_p);
2252
2253   if (!info->inv_id || info->has_nonlin_use)
2254     return NULL_TREE;
2255
2256   if (!*depends_on)
2257     *depends_on = BITMAP_ALLOC (NULL);
2258   bitmap_set_bit (*depends_on, info->inv_id);
2259
2260   return NULL_TREE;
2261 }
2262
2263 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2264    position to POS.  If USE is not NULL, the candidate is set as related to
2265    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2266    replacement of the final value of the iv by a direct computation.  */
2267
2268 static struct iv_cand *
2269 add_candidate_1 (struct ivopts_data *data,
2270                  tree base, tree step, bool important, enum iv_position pos,
2271                  struct iv_use *use, gimple incremented_at)
2272 {
2273   unsigned i;
2274   struct iv_cand *cand = NULL;
2275   tree type, orig_type;
2276
2277   /* For non-original variables, make sure their values are computed in a type
2278      that does not invoke undefined behavior on overflows (since in general,
2279      we cannot prove that these induction variables are non-wrapping).  */
2280   if (pos != IP_ORIGINAL)
2281     {
2282       orig_type = TREE_TYPE (base);
2283       type = generic_type_for (orig_type);
2284       if (type != orig_type)
2285         {
2286           base = fold_convert (type, base);
2287           step = fold_convert (type, step);
2288         }
2289     }
2290
2291   for (i = 0; i < n_iv_cands (data); i++)
2292     {
2293       cand = iv_cand (data, i);
2294
2295       if (cand->pos != pos)
2296         continue;
2297
2298       if (cand->incremented_at != incremented_at
2299           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2300               && cand->ainc_use != use))
2301         continue;
2302
2303       if (!cand->iv)
2304         {
2305           if (!base && !step)
2306             break;
2307
2308           continue;
2309         }
2310
2311       if (!base && !step)
2312         continue;
2313
2314       if (operand_equal_p (base, cand->iv->base, 0)
2315           && operand_equal_p (step, cand->iv->step, 0)
2316           && (TYPE_PRECISION (TREE_TYPE (base))
2317               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2318         break;
2319     }
2320
2321   if (i == n_iv_cands (data))
2322     {
2323       cand = XCNEW (struct iv_cand);
2324       cand->id = i;
2325
2326       if (!base && !step)
2327         cand->iv = NULL;
2328       else
2329         cand->iv = alloc_iv (base, step);
2330
2331       cand->pos = pos;
2332       if (pos != IP_ORIGINAL && cand->iv)
2333         {
2334           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2335           cand->var_after = cand->var_before;
2336         }
2337       cand->important = important;
2338       cand->incremented_at = incremented_at;
2339       data->iv_candidates.safe_push (cand);
2340
2341       if (step
2342           && TREE_CODE (step) != INTEGER_CST)
2343         {
2344           fd_ivopts_data = data;
2345           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2346         }
2347
2348       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2349         cand->ainc_use = use;
2350       else
2351         cand->ainc_use = NULL;
2352
2353       if (dump_file && (dump_flags & TDF_DETAILS))
2354         dump_cand (dump_file, cand);
2355     }
2356
2357   if (important && !cand->important)
2358     {
2359       cand->important = true;
2360       if (dump_file && (dump_flags & TDF_DETAILS))
2361         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2362     }
2363
2364   if (use)
2365     {
2366       bitmap_set_bit (use->related_cands, i);
2367       if (dump_file && (dump_flags & TDF_DETAILS))
2368         fprintf (dump_file, "Candidate %d is related to use %d\n",
2369                  cand->id, use->id);
2370     }
2371
2372   return cand;
2373 }
2374
2375 /* Returns true if incrementing the induction variable at the end of the LOOP
2376    is allowed.
2377
2378    The purpose is to avoid splitting latch edge with a biv increment, thus
2379    creating a jump, possibly confusing other optimization passes and leaving
2380    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2381    is not available (so we do not have a better alternative), or if the latch
2382    edge is already nonempty.  */
2383
2384 static bool
2385 allow_ip_end_pos_p (struct loop *loop)
2386 {
2387   if (!ip_normal_pos (loop))
2388     return true;
2389
2390   if (!empty_block_p (ip_end_pos (loop)))
2391     return true;
2392
2393   return false;
2394 }
2395
2396 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2397    Important field is set to IMPORTANT.  */
2398
2399 static void
2400 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2401                         bool important, struct iv_use *use)
2402 {
2403   basic_block use_bb = gimple_bb (use->stmt);
2404   enum machine_mode mem_mode;
2405   unsigned HOST_WIDE_INT cstepi;
2406
2407   /* If we insert the increment in any position other than the standard
2408      ones, we must ensure that it is incremented once per iteration.
2409      It must not be in an inner nested loop, or one side of an if
2410      statement.  */
2411   if (use_bb->loop_father != data->current_loop
2412       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2413       || stmt_could_throw_p (use->stmt)
2414       || !cst_and_fits_in_hwi (step))
2415     return;
2416
2417   cstepi = int_cst_value (step);
2418
2419   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2420   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2421         || USE_STORE_PRE_INCREMENT (mem_mode))
2422        && GET_MODE_SIZE (mem_mode) == cstepi)
2423       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2424            || USE_STORE_PRE_DECREMENT (mem_mode))
2425           && GET_MODE_SIZE (mem_mode) == -cstepi))
2426     {
2427       enum tree_code code = MINUS_EXPR;
2428       tree new_base;
2429       tree new_step = step;
2430
2431       if (POINTER_TYPE_P (TREE_TYPE (base)))
2432         {
2433           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2434           code = POINTER_PLUS_EXPR;
2435         }
2436       else
2437         new_step = fold_convert (TREE_TYPE (base), new_step);
2438       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2439       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2440                        use->stmt);
2441     }
2442   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2443         || USE_STORE_POST_INCREMENT (mem_mode))
2444        && GET_MODE_SIZE (mem_mode) == cstepi)
2445       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2446            || USE_STORE_POST_DECREMENT (mem_mode))
2447           && GET_MODE_SIZE (mem_mode) == -cstepi))
2448     {
2449       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2450                        use->stmt);
2451     }
2452 }
2453
2454 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2455    position to POS.  If USE is not NULL, the candidate is set as related to
2456    it.  The candidate computation is scheduled on all available positions.  */
2457
2458 static void
2459 add_candidate (struct ivopts_data *data,
2460                tree base, tree step, bool important, struct iv_use *use)
2461 {
2462   if (ip_normal_pos (data->current_loop))
2463     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2464   if (ip_end_pos (data->current_loop)
2465       && allow_ip_end_pos_p (data->current_loop))
2466     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2467
2468   if (use != NULL && use->type == USE_ADDRESS)
2469     add_autoinc_candidates (data, base, step, important, use);
2470 }
2471
2472 /* Adds standard iv candidates.  */
2473
2474 static void
2475 add_standard_iv_candidates (struct ivopts_data *data)
2476 {
2477   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2478
2479   /* The same for a double-integer type if it is still fast enough.  */
2480   if (TYPE_PRECISION
2481         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2482       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2483     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2484                    build_int_cst (long_integer_type_node, 1), true, NULL);
2485
2486   /* The same for a double-integer type if it is still fast enough.  */
2487   if (TYPE_PRECISION
2488         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2489       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2490     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2491                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2492 }
2493
2494
2495 /* Adds candidates bases on the old induction variable IV.  */
2496
2497 static void
2498 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2499 {
2500   gimple phi;
2501   tree def;
2502   struct iv_cand *cand;
2503
2504   add_candidate (data, iv->base, iv->step, true, NULL);
2505
2506   /* The same, but with initial value zero.  */
2507   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2508     add_candidate (data, size_int (0), iv->step, true, NULL);
2509   else
2510     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2511                    iv->step, true, NULL);
2512
2513   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2514   if (gimple_code (phi) == GIMPLE_PHI)
2515     {
2516       /* Additionally record the possibility of leaving the original iv
2517          untouched.  */
2518       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2519       cand = add_candidate_1 (data,
2520                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2521                               SSA_NAME_DEF_STMT (def));
2522       cand->var_before = iv->ssa_name;
2523       cand->var_after = def;
2524     }
2525 }
2526
2527 /* Adds candidates based on the old induction variables.  */
2528
2529 static void
2530 add_old_ivs_candidates (struct ivopts_data *data)
2531 {
2532   unsigned i;
2533   struct iv *iv;
2534   bitmap_iterator bi;
2535
2536   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2537     {
2538       iv = ver_info (data, i)->iv;
2539       if (iv && iv->biv_p && !integer_zerop (iv->step))
2540         add_old_iv_candidates (data, iv);
2541     }
2542 }
2543
2544 /* Adds candidates based on the value of the induction variable IV and USE.  */
2545
2546 static void
2547 add_iv_value_candidates (struct ivopts_data *data,
2548                          struct iv *iv, struct iv_use *use)
2549 {
2550   unsigned HOST_WIDE_INT offset;
2551   tree base;
2552   tree basetype;
2553
2554   add_candidate (data, iv->base, iv->step, false, use);
2555
2556   /* The same, but with initial value zero.  Make such variable important,
2557      since it is generic enough so that possibly many uses may be based
2558      on it.  */
2559   basetype = TREE_TYPE (iv->base);
2560   if (POINTER_TYPE_P (basetype))
2561     basetype = sizetype;
2562   add_candidate (data, build_int_cst (basetype, 0),
2563                  iv->step, true, use);
2564
2565   /* Third, try removing the constant offset.  Make sure to even
2566      add a candidate for &a[0] vs. (T *)&a.  */
2567   base = strip_offset (iv->base, &offset);
2568   if (offset
2569       || base != iv->base)
2570     add_candidate (data, base, iv->step, false, use);
2571 }
2572
2573 /* Adds candidates based on the uses.  */
2574
2575 static void
2576 add_derived_ivs_candidates (struct ivopts_data *data)
2577 {
2578   unsigned i;
2579
2580   for (i = 0; i < n_iv_uses (data); i++)
2581     {
2582       struct iv_use *use = iv_use (data, i);
2583
2584       if (!use)
2585         continue;
2586
2587       switch (use->type)
2588         {
2589         case USE_NONLINEAR_EXPR:
2590         case USE_COMPARE:
2591         case USE_ADDRESS:
2592           /* Just add the ivs based on the value of the iv used here.  */
2593           add_iv_value_candidates (data, use->iv, use);
2594           break;
2595
2596         default:
2597           gcc_unreachable ();
2598         }
2599     }
2600 }
2601
2602 /* Record important candidates and add them to related_cands bitmaps
2603    if needed.  */
2604
2605 static void
2606 record_important_candidates (struct ivopts_data *data)
2607 {
2608   unsigned i;
2609   struct iv_use *use;
2610
2611   for (i = 0; i < n_iv_cands (data); i++)
2612     {
2613       struct iv_cand *cand = iv_cand (data, i);
2614
2615       if (cand->important)
2616         bitmap_set_bit (data->important_candidates, i);
2617     }
2618
2619   data->consider_all_candidates = (n_iv_cands (data)
2620                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2621
2622   if (data->consider_all_candidates)
2623     {
2624       /* We will not need "related_cands" bitmaps in this case,
2625          so release them to decrease peak memory consumption.  */
2626       for (i = 0; i < n_iv_uses (data); i++)
2627         {
2628           use = iv_use (data, i);
2629           BITMAP_FREE (use->related_cands);
2630         }
2631     }
2632   else
2633     {
2634       /* Add important candidates to the related_cands bitmaps.  */
2635       for (i = 0; i < n_iv_uses (data); i++)
2636         bitmap_ior_into (iv_use (data, i)->related_cands,
2637                          data->important_candidates);
2638     }
2639 }
2640
2641 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2642    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2643    we allocate a simple list to every use.  */
2644
2645 static void
2646 alloc_use_cost_map (struct ivopts_data *data)
2647 {
2648   unsigned i, size, s;
2649
2650   for (i = 0; i < n_iv_uses (data); i++)
2651     {
2652       struct iv_use *use = iv_use (data, i);
2653
2654       if (data->consider_all_candidates)
2655         size = n_iv_cands (data);
2656       else
2657         {
2658           s = bitmap_count_bits (use->related_cands);
2659
2660           /* Round up to the power of two, so that moduling by it is fast.  */
2661           size = s ? (1 << ceil_log2 (s)) : 1;
2662         }
2663
2664       use->n_map_members = size;
2665       use->cost_map = XCNEWVEC (struct cost_pair, size);
2666     }
2667 }
2668
2669 /* Returns description of computation cost of expression whose runtime
2670    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2671
2672 static comp_cost
2673 new_cost (unsigned runtime, unsigned complexity)
2674 {
2675   comp_cost cost;
2676
2677   cost.cost = runtime;
2678   cost.complexity = complexity;
2679
2680   return cost;
2681 }
2682
2683 /* Adds costs COST1 and COST2.  */
2684
2685 static comp_cost
2686 add_costs (comp_cost cost1, comp_cost cost2)
2687 {
2688   cost1.cost += cost2.cost;
2689   cost1.complexity += cost2.complexity;
2690
2691   return cost1;
2692 }
2693 /* Subtracts costs COST1 and COST2.  */
2694
2695 static comp_cost
2696 sub_costs (comp_cost cost1, comp_cost cost2)
2697 {
2698   cost1.cost -= cost2.cost;
2699   cost1.complexity -= cost2.complexity;
2700
2701   return cost1;
2702 }
2703
2704 /* Returns a negative number if COST1 < COST2, a positive number if
2705    COST1 > COST2, and 0 if COST1 = COST2.  */
2706
2707 static int
2708 compare_costs (comp_cost cost1, comp_cost cost2)
2709 {
2710   if (cost1.cost == cost2.cost)
2711     return cost1.complexity - cost2.complexity;
2712
2713   return cost1.cost - cost2.cost;
2714 }
2715
2716 /* Returns true if COST is infinite.  */
2717
2718 static bool
2719 infinite_cost_p (comp_cost cost)
2720 {
2721   return cost.cost == INFTY;
2722 }
2723
2724 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2725    on invariants DEPENDS_ON and that the value used in expressing it
2726    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2727
2728 static void
2729 set_use_iv_cost (struct ivopts_data *data,
2730                  struct iv_use *use, struct iv_cand *cand,
2731                  comp_cost cost, bitmap depends_on, tree value,
2732                  enum tree_code comp, int inv_expr_id)
2733 {
2734   unsigned i, s;
2735
2736   if (infinite_cost_p (cost))
2737     {
2738       BITMAP_FREE (depends_on);
2739       return;
2740     }
2741
2742   if (data->consider_all_candidates)
2743     {
2744       use->cost_map[cand->id].cand = cand;
2745       use->cost_map[cand->id].cost = cost;
2746       use->cost_map[cand->id].depends_on = depends_on;
2747       use->cost_map[cand->id].value = value;
2748       use->cost_map[cand->id].comp = comp;
2749       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2750       return;
2751     }
2752
2753   /* n_map_members is a power of two, so this computes modulo.  */
2754   s = cand->id & (use->n_map_members - 1);
2755   for (i = s; i < use->n_map_members; i++)
2756     if (!use->cost_map[i].cand)
2757       goto found;
2758   for (i = 0; i < s; i++)
2759     if (!use->cost_map[i].cand)
2760       goto found;
2761
2762   gcc_unreachable ();
2763
2764 found:
2765   use->cost_map[i].cand = cand;
2766   use->cost_map[i].cost = cost;
2767   use->cost_map[i].depends_on = depends_on;
2768   use->cost_map[i].value = value;
2769   use->cost_map[i].comp = comp;
2770   use->cost_map[i].inv_expr_id = inv_expr_id;
2771 }
2772
2773 /* Gets cost of (USE, CANDIDATE) pair.  */
2774
2775 static struct cost_pair *
2776 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2777                  struct iv_cand *cand)
2778 {
2779   unsigned i, s;
2780   struct cost_pair *ret;
2781
2782   if (!cand)
2783     return NULL;
2784
2785   if (data->consider_all_candidates)
2786     {
2787       ret = use->cost_map + cand->id;
2788       if (!ret->cand)
2789         return NULL;
2790
2791       return ret;
2792     }
2793
2794   /* n_map_members is a power of two, so this computes modulo.  */
2795   s = cand->id & (use->n_map_members - 1);
2796   for (i = s; i < use->n_map_members; i++)
2797     if (use->cost_map[i].cand == cand)
2798       return use->cost_map + i;
2799     else if (use->cost_map[i].cand == NULL)
2800       return NULL;
2801   for (i = 0; i < s; i++)
2802     if (use->cost_map[i].cand == cand)
2803       return use->cost_map + i;
2804     else if (use->cost_map[i].cand == NULL)
2805       return NULL;
2806
2807   return NULL;
2808 }
2809
2810 /* Returns estimate on cost of computing SEQ.  */
2811
2812 static unsigned
2813 seq_cost (rtx seq, bool speed)
2814 {
2815   unsigned cost = 0;
2816   rtx set;
2817
2818   for (; seq; seq = NEXT_INSN (seq))
2819     {
2820       set = single_set (seq);
2821       if (set)
2822         cost += set_src_cost (SET_SRC (set), speed);
2823       else
2824         cost++;
2825     }
2826
2827   return cost;
2828 }
2829
2830 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2831 static rtx
2832 produce_memory_decl_rtl (tree obj, int *regno)
2833 {
2834   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2835   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2836   rtx x;
2837
2838   gcc_assert (obj);
2839   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2840     {
2841       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2842       x = gen_rtx_SYMBOL_REF (address_mode, name);
2843       SET_SYMBOL_REF_DECL (x, obj);
2844       x = gen_rtx_MEM (DECL_MODE (obj), x);
2845       set_mem_addr_space (x, as);
2846       targetm.encode_section_info (obj, x, true);
2847     }
2848   else
2849     {
2850       x = gen_raw_REG (address_mode, (*regno)++);
2851       x = gen_rtx_MEM (DECL_MODE (obj), x);
2852       set_mem_addr_space (x, as);
2853     }
2854
2855   return x;
2856 }
2857
2858 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2859    walk_tree.  DATA contains the actual fake register number.  */
2860
2861 static tree
2862 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2863 {
2864   tree obj = NULL_TREE;
2865   rtx x = NULL_RTX;
2866   int *regno = (int *) data;
2867
2868   switch (TREE_CODE (*expr_p))
2869     {
2870     case ADDR_EXPR:
2871       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2872            handled_component_p (*expr_p);
2873            expr_p = &TREE_OPERAND (*expr_p, 0))
2874         continue;
2875       obj = *expr_p;
2876       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2877         x = produce_memory_decl_rtl (obj, regno);
2878       break;
2879
2880     case SSA_NAME:
2881       *ws = 0;
2882       obj = SSA_NAME_VAR (*expr_p);
2883       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2884       if (!obj)
2885         return NULL_TREE;
2886       if (!DECL_RTL_SET_P (obj))
2887         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2888       break;
2889
2890     case VAR_DECL:
2891     case PARM_DECL:
2892     case RESULT_DECL:
2893       *ws = 0;
2894       obj = *expr_p;
2895
2896       if (DECL_RTL_SET_P (obj))
2897         break;
2898
2899       if (DECL_MODE (obj) == BLKmode)
2900         x = produce_memory_decl_rtl (obj, regno);
2901       else
2902         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2903
2904       break;
2905
2906     default:
2907       break;
2908     }
2909
2910   if (x)
2911     {
2912       decl_rtl_to_reset.safe_push (obj);
2913       SET_DECL_RTL (obj, x);
2914     }
2915
2916   return NULL_TREE;
2917 }
2918
2919 /* Determines cost of the computation of EXPR.  */
2920
2921 static unsigned
2922 computation_cost (tree expr, bool speed)
2923 {
2924   rtx seq, rslt;
2925   tree type = TREE_TYPE (expr);
2926   unsigned cost;
2927   /* Avoid using hard regs in ways which may be unsupported.  */
2928   int regno = LAST_VIRTUAL_REGISTER + 1;
2929   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2930   enum node_frequency real_frequency = node->frequency;
2931
2932   node->frequency = NODE_FREQUENCY_NORMAL;
2933   crtl->maybe_hot_insn_p = speed;
2934   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2935   start_sequence ();
2936   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2937   seq = get_insns ();
2938   end_sequence ();
2939   default_rtl_profile ();
2940   node->frequency = real_frequency;
2941
2942   cost = seq_cost (seq, speed);
2943   if (MEM_P (rslt))
2944     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2945                           TYPE_ADDR_SPACE (type), speed);
2946   else if (!REG_P (rslt))
2947     cost += set_src_cost (rslt, speed);
2948
2949   return cost;
2950 }
2951
2952 /* Returns variable containing the value of candidate CAND at statement AT.  */
2953
2954 static tree
2955 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2956 {
2957   if (stmt_after_increment (loop, cand, stmt))
2958     return cand->var_after;
2959   else
2960     return cand->var_before;
2961 }
2962
2963 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2964    same precision that is at least as wide as the precision of TYPE, stores
2965    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2966    type of A and B.  */
2967
2968 static tree
2969 determine_common_wider_type (tree *a, tree *b)
2970 {
2971   tree wider_type = NULL;
2972   tree suba, subb;
2973   tree atype = TREE_TYPE (*a);
2974
2975   if (CONVERT_EXPR_P (*a))
2976     {
2977       suba = TREE_OPERAND (*a, 0);
2978       wider_type = TREE_TYPE (suba);
2979       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2980         return atype;
2981     }
2982   else
2983     return atype;
2984
2985   if (CONVERT_EXPR_P (*b))
2986     {
2987       subb = TREE_OPERAND (*b, 0);
2988       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2989         return atype;
2990     }
2991   else
2992     return atype;
2993
2994   *a = suba;
2995   *b = subb;
2996   return wider_type;
2997 }
2998
2999 /* Determines the expression by that USE is expressed from induction variable
3000    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3001    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3002
3003 static bool
3004 get_computation_aff (struct loop *loop,
3005                      struct iv_use *use, struct iv_cand *cand, gimple at,
3006                      struct affine_tree_combination *aff)
3007 {
3008   tree ubase = use->iv->base;
3009   tree ustep = use->iv->step;
3010   tree cbase = cand->iv->base;
3011   tree cstep = cand->iv->step, cstep_common;
3012   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3013   tree common_type, var;
3014   tree uutype;
3015   aff_tree cbase_aff, var_aff;
3016   double_int rat;
3017
3018   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3019     {
3020       /* We do not have a precision to express the values of use.  */
3021       return false;
3022     }
3023
3024   var = var_at_stmt (loop, cand, at);
3025   uutype = unsigned_type_for (utype);
3026
3027   /* If the conversion is not noop, perform it.  */
3028   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3029     {
3030       cstep = fold_convert (uutype, cstep);
3031       cbase = fold_convert (uutype, cbase);
3032       var = fold_convert (uutype, var);
3033     }
3034
3035   if (!constant_multiple_of (ustep, cstep, &rat))
3036     return false;
3037
3038   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3039      type, we achieve better folding by computing their difference in this
3040      wider type, and cast the result to UUTYPE.  We do not need to worry about
3041      overflows, as all the arithmetics will in the end be performed in UUTYPE
3042      anyway.  */
3043   common_type = determine_common_wider_type (&ubase, &cbase);
3044
3045   /* use = ubase - ratio * cbase + ratio * var.  */
3046   tree_to_aff_combination (ubase, common_type, aff);
3047   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3048   tree_to_aff_combination (var, uutype, &var_aff);
3049
3050   /* We need to shift the value if we are after the increment.  */
3051   if (stmt_after_increment (loop, cand, at))
3052     {
3053       aff_tree cstep_aff;
3054
3055       if (common_type != uutype)
3056         cstep_common = fold_convert (common_type, cstep);
3057       else
3058         cstep_common = cstep;
3059
3060       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3061       aff_combination_add (&cbase_aff, &cstep_aff);
3062     }
3063
3064   aff_combination_scale (&cbase_aff, -rat);
3065   aff_combination_add (aff, &cbase_aff);
3066   if (common_type != uutype)
3067     aff_combination_convert (aff, uutype);
3068
3069   aff_combination_scale (&var_aff, rat);
3070   aff_combination_add (aff, &var_aff);
3071
3072   return true;
3073 }
3074
3075 /* Return the type of USE.  */
3076
3077 static tree
3078 get_use_type (struct iv_use *use)
3079 {
3080   tree base_type = TREE_TYPE (use->iv->base);
3081   tree type;
3082
3083   if (use->type == USE_ADDRESS)
3084     {
3085       /* The base_type may be a void pointer.  Create a pointer type based on
3086          the mem_ref instead.  */
3087       type = build_pointer_type (TREE_TYPE (*use->op_p));
3088       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3089                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3090     }
3091   else
3092     type = base_type;
3093
3094   return type;
3095 }
3096
3097 /* Determines the expression by that USE is expressed from induction variable
3098    CAND at statement AT in LOOP.  The computation is unshared.  */
3099
3100 static tree
3101 get_computation_at (struct loop *loop,
3102                     struct iv_use *use, struct iv_cand *cand, gimple at)
3103 {
3104   aff_tree aff;
3105   tree type = get_use_type (use);
3106
3107   if (!get_computation_aff (loop, use, cand, at, &aff))
3108     return NULL_TREE;
3109   unshare_aff_combination (&aff);
3110   return fold_convert (type, aff_combination_to_tree (&aff));
3111 }
3112
3113 /* Determines the expression by that USE is expressed from induction variable
3114    CAND in LOOP.  The computation is unshared.  */
3115
3116 static tree
3117 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3118 {
3119   return get_computation_at (loop, use, cand, use->stmt);
3120 }
3121
3122 /* Adjust the cost COST for being in loop setup rather than loop body.
3123    If we're optimizing for space, the loop setup overhead is constant;
3124    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3125 static unsigned
3126 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3127 {
3128   if (cost == INFTY)
3129     return cost;
3130   else if (optimize_loop_for_speed_p (data->current_loop))
3131     return cost / avg_loop_niter (data->current_loop);
3132   else
3133     return cost;
3134 }
3135
3136 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3137    validity for a memory reference accessing memory of mode MODE in
3138    address space AS.  */
3139
3140
3141 bool
3142 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3143                                  addr_space_t as)
3144 {
3145 #define MAX_RATIO 128
3146   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3147   static vec<sbitmap> valid_mult_list;
3148   sbitmap valid_mult;
3149
3150   if (data_index >= valid_mult_list.length ())
3151     valid_mult_list.safe_grow_cleared (data_index + 1);
3152
3153   valid_mult = valid_mult_list[data_index];
3154   if (!valid_mult)
3155     {
3156       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3157       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3158       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3159       rtx addr, scaled;
3160       HOST_WIDE_INT i;
3161
3162       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3163       bitmap_clear (valid_mult);
3164       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3165       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3166       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3167         {
3168           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3169           if (memory_address_addr_space_p (mode, addr, as)
3170               || memory_address_addr_space_p (mode, scaled, as))
3171             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3172         }
3173
3174       if (dump_file && (dump_flags & TDF_DETAILS))
3175         {
3176           fprintf (dump_file, "  allowed multipliers:");
3177           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3178             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3179               fprintf (dump_file, " %d", (int) i);
3180           fprintf (dump_file, "\n");
3181           fprintf (dump_file, "\n");
3182         }
3183
3184       valid_mult_list[data_index] = valid_mult;
3185     }
3186
3187   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3188     return false;
3189
3190   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3191 }
3192
3193 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3194    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3195    variable is omitted.  Compute the cost for a memory reference that accesses
3196    a memory location of mode MEM_MODE in address space AS.
3197
3198    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3199    size of MEM_MODE / RATIO) is available.  To make this determination, we
3200    look at the size of the increment to be made, which is given in CSTEP.
3201    CSTEP may be zero if the step is unknown.
3202    STMT_AFTER_INC is true iff the statement we're looking at is after the
3203    increment of the original biv.
3204
3205    TODO -- there must be some better way.  This all is quite crude.  */
3206
3207 typedef struct address_cost_data_s
3208 {
3209   HOST_WIDE_INT min_offset, max_offset;
3210   unsigned costs[2][2][2][2];
3211 } *address_cost_data;
3212
3213
3214 static comp_cost
3215 get_address_cost (bool symbol_present, bool var_present,
3216                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3217                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3218                   addr_space_t as, bool speed,
3219                   bool stmt_after_inc, bool *may_autoinc)
3220 {
3221   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3222   static vec<address_cost_data> address_cost_data_list;
3223   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3224   address_cost_data data;
3225   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3226   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3227   unsigned cost, acost, complexity;
3228   bool offset_p, ratio_p, autoinc;
3229   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3230   unsigned HOST_WIDE_INT mask;
3231   unsigned bits;
3232
3233   if (data_index >= address_cost_data_list.length ())
3234     address_cost_data_list.safe_grow_cleared (data_index + 1);
3235
3236   data = address_cost_data_list[data_index];
3237   if (!data)
3238     {
3239       HOST_WIDE_INT i;
3240       HOST_WIDE_INT rat, off = 0;
3241       int old_cse_not_expected, width;
3242       unsigned sym_p, var_p, off_p, rat_p, add_c;
3243       rtx seq, addr, base;
3244       rtx reg0, reg1;
3245
3246       data = (address_cost_data) xcalloc (1, sizeof (*data));
3247
3248       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3249
3250       width = GET_MODE_BITSIZE (address_mode) - 1;
3251       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3252         width = HOST_BITS_PER_WIDE_INT - 1;
3253       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3254
3255       for (i = width; i >= 0; i--)
3256         {
3257           off = -((unsigned HOST_WIDE_INT) 1 << i);
3258           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3259           if (memory_address_addr_space_p (mem_mode, addr, as))
3260             break;
3261         }
3262       data->min_offset = (i == -1? 0 : off);
3263
3264       for (i = width; i >= 0; i--)
3265         {
3266           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3267           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3268           if (memory_address_addr_space_p (mem_mode, addr, as))
3269             break;
3270         }
3271       if (i == -1)
3272         off = 0;
3273       data->max_offset = off;
3274
3275       if (dump_file && (dump_flags & TDF_DETAILS))
3276         {
3277           fprintf (dump_file, "get_address_cost:\n");
3278           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3279                    GET_MODE_NAME (mem_mode),
3280                    data->min_offset);
3281           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3282                    GET_MODE_NAME (mem_mode),
3283                    data->max_offset);
3284         }
3285
3286       rat = 1;
3287       for (i = 2; i <= MAX_RATIO; i++)
3288         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3289           {
3290             rat = i;
3291             break;
3292           }
3293
3294       /* Compute the cost of various addressing modes.  */
3295       acost = 0;
3296       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3297       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3298
3299       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3300           || USE_STORE_PRE_DECREMENT (mem_mode))
3301         {
3302           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3303           has_predec[mem_mode]
3304             = memory_address_addr_space_p (mem_mode, addr, as);
3305         }
3306       if (USE_LOAD_POST_DECREMENT (mem_mode)
3307           || USE_STORE_POST_DECREMENT (mem_mode))
3308         {
3309           addr = gen_rtx_POST_DEC (address_mode, reg0);
3310           has_postdec[mem_mode]
3311             = memory_address_addr_space_p (mem_mode, addr, as);
3312         }
3313       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3314           || USE_STORE_PRE_DECREMENT (mem_mode))
3315         {
3316           addr = gen_rtx_PRE_INC (address_mode, reg0);
3317           has_preinc[mem_mode]
3318             = memory_address_addr_space_p (mem_mode, addr, as);
3319         }
3320       if (USE_LOAD_POST_INCREMENT (mem_mode)
3321           || USE_STORE_POST_INCREMENT (mem_mode))
3322         {
3323           addr = gen_rtx_POST_INC (address_mode, reg0);
3324           has_postinc[mem_mode]
3325             = memory_address_addr_space_p (mem_mode, addr, as);
3326         }
3327       for (i = 0; i < 16; i++)
3328         {
3329           sym_p = i & 1;
3330           var_p = (i >> 1) & 1;
3331           off_p = (i >> 2) & 1;
3332           rat_p = (i >> 3) & 1;
3333
3334           addr = reg0;
3335           if (rat_p)
3336             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3337                                    gen_int_mode (rat, address_mode));
3338
3339           if (var_p)
3340             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3341
3342           if (sym_p)
3343             {
3344               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3345               /* ??? We can run into trouble with some backends by presenting
3346                  it with symbols which haven't been properly passed through
3347                  targetm.encode_section_info.  By setting the local bit, we
3348                  enhance the probability of things working.  */
3349               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3350
3351               if (off_p)
3352                 base = gen_rtx_fmt_e (CONST, address_mode,
3353                                       gen_rtx_fmt_ee
3354                                         (PLUS, address_mode, base,
3355                                          gen_int_mode (off, address_mode)));
3356             }
3357           else if (off_p)
3358             base = gen_int_mode (off, address_mode);
3359           else
3360             base = NULL_RTX;
3361
3362           if (base)
3363             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3364
3365           start_sequence ();
3366           /* To avoid splitting addressing modes, pretend that no cse will
3367              follow.  */
3368           old_cse_not_expected = cse_not_expected;
3369           cse_not_expected = true;
3370           addr = memory_address_addr_space (mem_mode, addr, as);
3371           cse_not_expected = old_cse_not_expected;
3372           seq = get_insns ();
3373           end_sequence ();
3374
3375           acost = seq_cost (seq, speed);
3376           acost += address_cost (addr, mem_mode, as, speed);
3377
3378           if (!acost)
3379             acost = 1;
3380           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3381         }
3382
3383       /* On some targets, it is quite expensive to load symbol to a register,
3384          which makes addresses that contain symbols look much more expensive.
3385          However, the symbol will have to be loaded in any case before the
3386          loop (and quite likely we have it in register already), so it does not
3387          make much sense to penalize them too heavily.  So make some final
3388          tweaks for the SYMBOL_PRESENT modes:
3389
3390          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391          var is cheaper, use this mode with small penalty.
3392          If VAR_PRESENT is true, try whether the mode with
3393          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394          if this is the case, use it.  */
3395       add_c = add_cost (speed, address_mode);
3396       for (i = 0; i < 8; i++)
3397         {
3398           var_p = i & 1;
3399           off_p = (i >> 1) & 1;
3400           rat_p = (i >> 2) & 1;
3401
3402           acost = data->costs[0][1][off_p][rat_p] + 1;
3403           if (var_p)
3404             acost += add_c;
3405
3406           if (acost < data->costs[1][var_p][off_p][rat_p])
3407             data->costs[1][var_p][off_p][rat_p] = acost;
3408         }
3409
3410       if (dump_file && (dump_flags & TDF_DETAILS))
3411         {
3412           fprintf (dump_file, "Address costs:\n");
3413
3414           for (i = 0; i < 16; i++)
3415             {
3416               sym_p = i & 1;
3417               var_p = (i >> 1) & 1;
3418               off_p = (i >> 2) & 1;
3419               rat_p = (i >> 3) & 1;
3420
3421               fprintf (dump_file, "  ");
3422               if (sym_p)
3423                 fprintf (dump_file, "sym + ");
3424               if (var_p)
3425                 fprintf (dump_file, "var + ");
3426               if (off_p)
3427                 fprintf (dump_file, "cst + ");
3428               if (rat_p)
3429                 fprintf (dump_file, "rat * ");
3430
3431               acost = data->costs[sym_p][var_p][off_p][rat_p];
3432               fprintf (dump_file, "index costs %d\n", acost);
3433             }
3434           if (has_predec[mem_mode] || has_postdec[mem_mode]
3435               || has_preinc[mem_mode] || has_postinc[mem_mode])
3436             fprintf (dump_file, "  May include autoinc/dec\n");
3437           fprintf (dump_file, "\n");
3438         }
3439
3440       address_cost_data_list[data_index] = data;
3441     }
3442
3443   bits = GET_MODE_BITSIZE (address_mode);
3444   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3445   offset &= mask;
3446   if ((offset >> (bits - 1) & 1))
3447     offset |= ~mask;
3448   s_offset = offset;
3449
3450   autoinc = false;
3451   msize = GET_MODE_SIZE (mem_mode);
3452   autoinc_offset = offset;
3453   if (stmt_after_inc)
3454     autoinc_offset += ratio * cstep;
3455   if (symbol_present || var_present || ratio != 1)
3456     autoinc = false;
3457   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3458                && msize == cstep)
3459            || (has_postdec[mem_mode] && autoinc_offset == 0
3460                && msize == -cstep)
3461            || (has_preinc[mem_mode] && autoinc_offset == msize
3462                && msize == cstep)
3463            || (has_predec[mem_mode] && autoinc_offset == -msize
3464                && msize == -cstep))
3465     autoinc = true;
3466
3467   cost = 0;
3468   offset_p = (s_offset != 0
3469               && data->min_offset <= s_offset
3470               && s_offset <= data->max_offset);
3471   ratio_p = (ratio != 1
3472              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3473
3474   if (ratio != 1 && !ratio_p)
3475     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3476
3477   if (s_offset && !offset_p && !symbol_present)
3478     cost += add_cost (speed, address_mode);
3479
3480   if (may_autoinc)
3481     *may_autoinc = autoinc;
3482   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3483   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3484   return new_cost (cost + acost, complexity);
3485 }
3486
3487  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3488     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3489     calculating the operands of EXPR.  Returns true if successful, and returns
3490     the cost in COST.  */
3491
3492 static bool
3493 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3494                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3495 {
3496   comp_cost res;
3497   tree op1 = TREE_OPERAND (expr, 1);
3498   tree cst = TREE_OPERAND (mult, 1);
3499   tree multop = TREE_OPERAND (mult, 0);
3500   int m = exact_log2 (int_cst_value (cst));
3501   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3502   int sa_cost;
3503   bool equal_p = false;
3504
3505   if (!(m >= 0 && m < maxm))
3506     return false;
3507
3508   if (operand_equal_p (op1, mult, 0))
3509     equal_p = true;
3510
3511   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3512              ? shiftadd_cost (speed, mode, m)
3513              : (equal_p
3514                 ? shiftsub1_cost (speed, mode, m)
3515                 : shiftsub0_cost (speed, mode, m)));
3516   res = new_cost (sa_cost, 0);
3517   res = add_costs (res, equal_p ? cost0 : cost1);
3518
3519   STRIP_NOPS (multop);
3520   if (!is_gimple_val (multop))
3521     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3522
3523   *cost = res;
3524   return true;
3525 }
3526
3527 /* Estimates cost of forcing expression EXPR into a variable.  */
3528
3529 static comp_cost
3530 force_expr_to_var_cost (tree expr, bool speed)
3531 {
3532   static bool costs_initialized = false;
3533   static unsigned integer_cost [2];
3534   static unsigned symbol_cost [2];
3535   static unsigned address_cost [2];
3536   tree op0, op1;
3537   comp_cost cost0, cost1, cost;
3538   enum machine_mode mode;
3539
3540   if (!costs_initialized)
3541     {
3542       tree type = build_pointer_type (integer_type_node);
3543       tree var, addr;
3544       rtx x;
3545       int i;
3546
3547       var = create_tmp_var_raw (integer_type_node, "test_var");
3548       TREE_STATIC (var) = 1;
3549       x = produce_memory_decl_rtl (var, NULL);
3550       SET_DECL_RTL (var, x);
3551
3552       addr = build1 (ADDR_EXPR, type, var);
3553
3554
3555       for (i = 0; i < 2; i++)
3556         {
3557           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3558                                                              2000), i);
3559
3560           symbol_cost[i] = computation_cost (addr, i) + 1;
3561
3562           address_cost[i]
3563             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3564           if (dump_file && (dump_flags & TDF_DETAILS))
3565             {
3566               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3567               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3568               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3569               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3570               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3571               fprintf (dump_file, "\n");
3572             }
3573         }
3574
3575       costs_initialized = true;
3576     }
3577
3578   STRIP_NOPS (expr);
3579
3580   if (SSA_VAR_P (expr))
3581     return no_cost;
3582
3583   if (is_gimple_min_invariant (expr))
3584     {
3585       if (TREE_CODE (expr) == INTEGER_CST)
3586         return new_cost (integer_cost [speed], 0);
3587
3588       if (TREE_CODE (expr) == ADDR_EXPR)
3589         {
3590           tree obj = TREE_OPERAND (expr, 0);
3591
3592           if (TREE_CODE (obj) == VAR_DECL
3593               || TREE_CODE (obj) == PARM_DECL
3594               || TREE_CODE (obj) == RESULT_DECL)
3595             return new_cost (symbol_cost [speed], 0);
3596         }
3597
3598       return new_cost (address_cost [speed], 0);
3599     }
3600
3601   switch (TREE_CODE (expr))
3602     {
3603     case POINTER_PLUS_EXPR:
3604     case PLUS_EXPR:
3605     case MINUS_EXPR:
3606     case MULT_EXPR:
3607       op0 = TREE_OPERAND (expr, 0);
3608       op1 = TREE_OPERAND (expr, 1);
3609       STRIP_NOPS (op0);
3610       STRIP_NOPS (op1);
3611       break;
3612
3613     CASE_CONVERT:
3614     case NEGATE_EXPR:
3615       op0 = TREE_OPERAND (expr, 0);
3616       STRIP_NOPS (op0);
3617       op1 = NULL_TREE;
3618       break;
3619
3620     default:
3621       /* Just an arbitrary value, FIXME.  */
3622       return new_cost (target_spill_cost[speed], 0);
3623     }
3624
3625   if (op0 == NULL_TREE
3626       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3627     cost0 = no_cost;
3628   else
3629     cost0 = force_expr_to_var_cost (op0, speed);
3630
3631   if (op1 == NULL_TREE
3632       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3633     cost1 = no_cost;
3634   else
3635     cost1 = force_expr_to_var_cost (op1, speed);
3636
3637   mode = TYPE_MODE (TREE_TYPE (expr));
3638   switch (TREE_CODE (expr))
3639     {
3640     case POINTER_PLUS_EXPR:
3641     case PLUS_EXPR:
3642     case MINUS_EXPR:
3643     case NEGATE_EXPR:
3644       cost = new_cost (add_cost (speed, mode), 0);
3645       if (TREE_CODE (expr) != NEGATE_EXPR)
3646         {
3647           tree mult = NULL_TREE;
3648           comp_cost sa_cost;
3649           if (TREE_CODE (op1) == MULT_EXPR)
3650             mult = op1;
3651           else if (TREE_CODE (op0) == MULT_EXPR)
3652             mult = op0;
3653
3654           if (mult != NULL_TREE
3655               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3656               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3657                                     speed, &sa_cost))
3658             return sa_cost;
3659         }
3660       break;
3661
3662     CASE_CONVERT:
3663       {
3664         tree inner_mode, outer_mode;
3665         outer_mode = TREE_TYPE (expr);
3666         inner_mode = TREE_TYPE (op0);
3667         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3668                                        TYPE_MODE (inner_mode), speed), 0);
3669       }
3670       break;
3671
3672     case MULT_EXPR:
3673       if (cst_and_fits_in_hwi (op0))
3674         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3675                                              mode, speed), 0);
3676       else if (cst_and_fits_in_hwi (op1))
3677         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3678                                              mode, speed), 0);
3679       else
3680         return new_cost (target_spill_cost [speed], 0);
3681       break;
3682
3683     default:
3684       gcc_unreachable ();
3685     }
3686
3687   cost = add_costs (cost, cost0);
3688   cost = add_costs (cost, cost1);
3689
3690   /* Bound the cost by target_spill_cost.  The parts of complicated
3691      computations often are either loop invariant or at least can
3692      be shared between several iv uses, so letting this grow without
3693      limits would not give reasonable results.  */
3694   if (cost.cost > (int) target_spill_cost [speed])
3695     cost.cost = target_spill_cost [speed];
3696
3697   return cost;
3698 }
3699
3700 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3701    invariants the computation depends on.  */
3702
3703 static comp_cost
3704 force_var_cost (struct ivopts_data *data,
3705                 tree expr, bitmap *depends_on)
3706 {
3707   if (depends_on)
3708     {
3709       fd_ivopts_data = data;
3710       walk_tree (&expr, find_depends, depends_on, NULL);
3711     }
3712
3713   return force_expr_to_var_cost (expr, data->speed);
3714 }
3715
3716 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3717    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3718    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3719    invariants the computation depends on.  */
3720
3721 static comp_cost
3722 split_address_cost (struct ivopts_data *data,
3723                     tree addr, bool *symbol_present, bool *var_present,
3724                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3725 {
3726   tree core;
3727   HOST_WIDE_INT bitsize;
3728   HOST_WIDE_INT bitpos;
3729   tree toffset;
3730   enum machine_mode mode;
3731   int unsignedp, volatilep;
3732
3733   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3734                               &unsignedp, &volatilep, false);
3735
3736   if (toffset != 0
3737       || bitpos % BITS_PER_UNIT != 0
3738       || TREE_CODE (core) != VAR_DECL)
3739     {
3740       *symbol_present = false;
3741       *var_present = true;
3742       fd_ivopts_data = data;
3743       walk_tree (&addr, find_depends, depends_on, NULL);
3744       return new_cost (target_spill_cost[data->speed], 0);
3745     }
3746
3747   *offset += bitpos / BITS_PER_UNIT;
3748   if (TREE_STATIC (core)
3749       || DECL_EXTERNAL (core))
3750     {
3751       *symbol_present = true;
3752       *var_present = false;
3753       return no_cost;
3754     }
3755
3756   *symbol_present = false;
3757   *var_present = true;
3758   return no_cost;
3759 }
3760
3761 /* Estimates cost of expressing difference of addresses E1 - E2 as
3762    var + symbol + offset.  The value of offset is added to OFFSET,
3763    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3764    part is missing.  DEPENDS_ON is a set of the invariants the computation
3765    depends on.  */
3766
3767 static comp_cost
3768 ptr_difference_cost (struct ivopts_data *data,
3769                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3770                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3771 {
3772   HOST_WIDE_INT diff = 0;
3773   aff_tree aff_e1, aff_e2;
3774   tree type;
3775
3776   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3777
3778   if (ptr_difference_const (e1, e2, &diff))
3779     {
3780       *offset += diff;
3781       *symbol_present = false;
3782       *var_present = false;
3783       return no_cost;
3784     }
3785
3786   if (integer_zerop (e2))
3787     return split_address_cost (data, TREE_OPERAND (e1, 0),
3788                                symbol_present, var_present, offset, depends_on);
3789
3790   *symbol_present = false;
3791   *var_present = true;
3792
3793   type = signed_type_for (TREE_TYPE (e1));
3794   tree_to_aff_combination (e1, type, &aff_e1);
3795   tree_to_aff_combination (e2, type, &aff_e2);
3796   aff_combination_scale (&aff_e2, double_int_minus_one);
3797   aff_combination_add (&aff_e1, &aff_e2);
3798
3799   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3800 }
3801
3802 /* Estimates cost of expressing difference E1 - E2 as
3803    var + symbol + offset.  The value of offset is added to OFFSET,
3804    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3805    part is missing.  DEPENDS_ON is a set of the invariants the computation
3806    depends on.  */
3807
3808 static comp_cost
3809 difference_cost (struct ivopts_data *data,
3810                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3811                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3812 {
3813   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3814   unsigned HOST_WIDE_INT off1, off2;
3815   aff_tree aff_e1, aff_e2;
3816   tree type;
3817
3818   e1 = strip_offset (e1, &off1);
3819   e2 = strip_offset (e2, &off2);
3820   *offset += off1 - off2;
3821
3822   STRIP_NOPS (e1);
3823   STRIP_NOPS (e2);
3824
3825   if (TREE_CODE (e1) == ADDR_EXPR)
3826     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3827                                 offset, depends_on);
3828   *symbol_present = false;
3829
3830   if (operand_equal_p (e1, e2, 0))
3831     {
3832       *var_present = false;
3833       return no_cost;
3834     }
3835
3836   *var_present = true;
3837
3838   if (integer_zerop (e2))
3839     return force_var_cost (data, e1, depends_on);
3840
3841   if (integer_zerop (e1))
3842     {
3843       comp_cost cost = force_var_cost (data, e2, depends_on);
3844       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3845       return cost;
3846     }
3847
3848   type = signed_type_for (TREE_TYPE (e1));
3849   tree_to_aff_combination (e1, type, &aff_e1);
3850   tree_to_aff_combination (e2, type, &aff_e2);
3851   aff_combination_scale (&aff_e2, double_int_minus_one);
3852   aff_combination_add (&aff_e1, &aff_e2);
3853
3854   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3855 }
3856
3857 /* Returns true if AFF1 and AFF2 are identical.  */
3858
3859 static bool
3860 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3861 {
3862   unsigned i;
3863
3864   if (aff1->n != aff2->n)
3865     return false;
3866
3867   for (i = 0; i < aff1->n; i++)
3868     {
3869       if (aff1->elts[i].coef != aff2->elts[i].coef)
3870         return false;
3871
3872       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3873         return false;
3874     }
3875   return true;
3876 }
3877
3878 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3879
3880 static int
3881 get_expr_id (struct ivopts_data *data, tree expr)
3882 {
3883   struct iv_inv_expr_ent ent;
3884   struct iv_inv_expr_ent **slot;
3885
3886   ent.expr = expr;
3887   ent.hash = iterative_hash_expr (expr, 0);
3888   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3889   if (*slot)
3890     return (*slot)->id;
3891
3892   *slot = XNEW (struct iv_inv_expr_ent);
3893   (*slot)->expr = expr;
3894   (*slot)->hash = ent.hash;
3895   (*slot)->id = data->inv_expr_id++;
3896   return (*slot)->id;
3897 }
3898
3899 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3900    requires a new compiler generated temporary.  Returns -1 otherwise.
3901    ADDRESS_P is a flag indicating if the expression is for address
3902    computation.  */
3903
3904 static int
3905 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3906                             tree cbase, HOST_WIDE_INT ratio,
3907                             bool address_p)
3908 {
3909   aff_tree ubase_aff, cbase_aff;
3910   tree expr, ub, cb;
3911
3912   STRIP_NOPS (ubase);
3913   STRIP_NOPS (cbase);
3914   ub = ubase;
3915   cb = cbase;
3916
3917   if ((TREE_CODE (ubase) == INTEGER_CST)
3918       && (TREE_CODE (cbase) == INTEGER_CST))
3919     return -1;
3920
3921   /* Strips the constant part. */
3922   if (TREE_CODE (ubase) == PLUS_EXPR
3923       || TREE_CODE (ubase) == MINUS_EXPR
3924       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3925     {
3926       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3927         ubase = TREE_OPERAND (ubase, 0);
3928     }
3929
3930   /* Strips the constant part. */
3931   if (TREE_CODE (cbase) == PLUS_EXPR
3932       || TREE_CODE (cbase) == MINUS_EXPR
3933       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3934     {
3935       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3936         cbase = TREE_OPERAND (cbase, 0);
3937     }
3938
3939   if (address_p)
3940     {
3941       if (((TREE_CODE (ubase) == SSA_NAME)
3942            || (TREE_CODE (ubase) == ADDR_EXPR
3943                && is_gimple_min_invariant (ubase)))
3944           && (TREE_CODE (cbase) == INTEGER_CST))
3945         return -1;
3946
3947       if (((TREE_CODE (cbase) == SSA_NAME)
3948            || (TREE_CODE (cbase) == ADDR_EXPR
3949                && is_gimple_min_invariant (cbase)))
3950           && (TREE_CODE (ubase) == INTEGER_CST))
3951         return -1;
3952     }
3953
3954   if (ratio == 1)
3955     {
3956       if (operand_equal_p (ubase, cbase, 0))
3957         return -1;
3958
3959       if (TREE_CODE (ubase) == ADDR_EXPR
3960           && TREE_CODE (cbase) == ADDR_EXPR)
3961         {
3962           tree usym, csym;
3963
3964           usym = TREE_OPERAND (ubase, 0);
3965           csym = TREE_OPERAND (cbase, 0);
3966           if (TREE_CODE (usym) == ARRAY_REF)
3967             {
3968               tree ind = TREE_OPERAND (usym, 1);
3969               if (TREE_CODE (ind) == INTEGER_CST
3970                   && host_integerp (ind, 0)
3971                   && TREE_INT_CST_LOW (ind) == 0)
3972                 usym = TREE_OPERAND (usym, 0);
3973             }
3974           if (TREE_CODE (csym) == ARRAY_REF)
3975             {
3976               tree ind = TREE_OPERAND (csym, 1);
3977               if (TREE_CODE (ind) == INTEGER_CST
3978                   && host_integerp (ind, 0)
3979                   && TREE_INT_CST_LOW (ind) == 0)
3980                 csym = TREE_OPERAND (csym, 0);
3981             }
3982           if (operand_equal_p (usym, csym, 0))
3983             return -1;
3984         }
3985       /* Now do more complex comparison  */
3986       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3987       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3988       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3989         return -1;
3990     }
3991
3992   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3993   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3994
3995   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3996   aff_combination_add (&ubase_aff, &cbase_aff);
3997   expr = aff_combination_to_tree (&ubase_aff);
3998   return get_expr_id (data, expr);
3999 }
4000
4001
4002
4003 /* Determines the cost of the computation by that USE is expressed
4004    from induction variable CAND.  If ADDRESS_P is true, we just need
4005    to create an address from it, otherwise we want to get it into
4006    register.  A set of invariants we depend on is stored in
4007    DEPENDS_ON.  AT is the statement at that the value is computed.
4008    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4009    addressing is likely.  */
4010
4011 static comp_cost
4012 get_computation_cost_at (struct ivopts_data *data,
4013                          struct iv_use *use, struct iv_cand *cand,
4014                          bool address_p, bitmap *depends_on, gimple at,
4015                          bool *can_autoinc,
4016                          int *inv_expr_id)
4017 {
4018   tree ubase = use->iv->base, ustep = use->iv->step;
4019   tree cbase, cstep;
4020   tree utype = TREE_TYPE (ubase), ctype;
4021   unsigned HOST_WIDE_INT cstepi, offset = 0;
4022   HOST_WIDE_INT ratio, aratio;
4023   bool var_present, symbol_present, stmt_is_after_inc;
4024   comp_cost cost;
4025   double_int rat;
4026   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4027   enum machine_mode mem_mode = (address_p
4028                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4029                                 : VOIDmode);
4030
4031   *depends_on = NULL;
4032
4033   /* Only consider real candidates.  */
4034   if (!cand->iv)
4035     return infinite_cost;
4036
4037   cbase = cand->iv->base;
4038   cstep = cand->iv->step;
4039   ctype = TREE_TYPE (cbase);
4040
4041   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4042     {
4043       /* We do not have a precision to express the values of use.  */
4044       return infinite_cost;
4045     }
4046
4047   if (address_p
4048       || (use->iv->base_object
4049           && cand->iv->base_object
4050           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4051           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4052     {
4053       /* Do not try to express address of an object with computation based
4054          on address of a different object.  This may cause problems in rtl
4055          level alias analysis (that does not expect this to be happening,
4056          as this is illegal in C), and would be unlikely to be useful
4057          anyway.  */
4058       if (use->iv->base_object
4059           && cand->iv->base_object
4060           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4061         return infinite_cost;
4062     }
4063
4064   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4065     {
4066       /* TODO -- add direct handling of this case.  */
4067       goto fallback;
4068     }
4069
4070   /* CSTEPI is removed from the offset in case statement is after the
4071      increment.  If the step is not constant, we use zero instead.
4072      This is a bit imprecise (there is the extra addition), but
4073      redundancy elimination is likely to transform the code so that
4074      it uses value of the variable before increment anyway,
4075      so it is not that much unrealistic.  */
4076   if (cst_and_fits_in_hwi (cstep))
4077     cstepi = int_cst_value (cstep);
4078   else
4079     cstepi = 0;
4080
4081   if (!constant_multiple_of (ustep, cstep, &rat))
4082     return infinite_cost;
4083
4084   if (rat.fits_shwi ())
4085     ratio = rat.to_shwi ();
4086   else
4087     return infinite_cost;
4088
4089   STRIP_NOPS (cbase);
4090   ctype = TREE_TYPE (cbase);
4091
4092   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4093
4094   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4095      or ratio == 1, it is better to handle this like
4096
4097      ubase - ratio * cbase + ratio * var
4098
4099      (also holds in the case ratio == -1, TODO.  */
4100
4101   if (cst_and_fits_in_hwi (cbase))
4102     {
4103       offset = - ratio * int_cst_value (cbase);
4104       cost = difference_cost (data,
4105                               ubase, build_int_cst (utype, 0),
4106                               &symbol_present, &var_present, &offset,
4107                               depends_on);
4108       cost.cost /= avg_loop_niter (data->current_loop);
4109     }
4110   else if (ratio == 1)
4111     {
4112       tree real_cbase = cbase;
4113
4114       /* Check to see if any adjustment is needed.  */
4115       if (cstepi == 0 && stmt_is_after_inc)
4116         {
4117           aff_tree real_cbase_aff;
4118           aff_tree cstep_aff;
4119
4120           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4121                                    &real_cbase_aff);
4122           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4123
4124           aff_combination_add (&real_cbase_aff, &cstep_aff);
4125           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4126         }
4127
4128       cost = difference_cost (data,
4129                               ubase, real_cbase,
4130                               &symbol_present, &var_present, &offset,
4131                               depends_on);
4132       cost.cost /= avg_loop_niter (data->current_loop);
4133     }
4134   else if (address_p
4135            && !POINTER_TYPE_P (ctype)
4136            && multiplier_allowed_in_address_p
4137                 (ratio, mem_mode,
4138                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4139     {
4140       cbase
4141         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4142       cost = difference_cost (data,
4143                               ubase, cbase,
4144                               &symbol_present, &var_present, &offset,
4145                               depends_on);
4146       cost.cost /= avg_loop_niter (data->current_loop);
4147     }
4148   else
4149     {
4150       cost = force_var_cost (data, cbase, depends_on);
4151       cost = add_costs (cost,
4152                         difference_cost (data,
4153                                          ubase, build_int_cst (utype, 0),
4154                                          &symbol_present, &var_present,
4155                                          &offset, depends_on));
4156       cost.cost /= avg_loop_niter (data->current_loop);
4157       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4158     }
4159
4160   if (inv_expr_id)
4161     {
4162       *inv_expr_id =
4163           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4164       /* Clear depends on.  */
4165       if (*inv_expr_id != -1 && depends_on && *depends_on)
4166         bitmap_clear (*depends_on);
4167     }
4168
4169   /* If we are after the increment, the value of the candidate is higher by
4170      one iteration.  */
4171   if (stmt_is_after_inc)
4172     offset -= ratio * cstepi;
4173
4174   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4175      (symbol/var1/const parts may be omitted).  If we are looking for an
4176      address, find the cost of addressing this.  */
4177   if (address_p)
4178     return add_costs (cost,
4179                       get_address_cost (symbol_present, var_present,
4180                                         offset, ratio, cstepi,
4181                                         mem_mode,
4182                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4183                                         speed, stmt_is_after_inc,
4184                                         can_autoinc));
4185
4186   /* Otherwise estimate the costs for computing the expression.  */
4187   if (!symbol_present && !var_present && !offset)
4188     {
4189       if (ratio != 1)
4190         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4191       return cost;
4192     }
4193
4194   /* Symbol + offset should be compile-time computable so consider that they
4195       are added once to the variable, if present.  */
4196   if (var_present && (symbol_present || offset))
4197     cost.cost += adjust_setup_cost (data,
4198                                     add_cost (speed, TYPE_MODE (ctype)));
4199
4200   /* Having offset does not affect runtime cost in case it is added to
4201      symbol, but it increases complexity.  */
4202   if (offset)
4203     cost.complexity++;
4204
4205   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4206
4207   aratio = ratio > 0 ? ratio : -ratio;
4208   if (aratio != 1)
4209     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4210   return cost;
4211
4212 fallback:
4213   if (can_autoinc)
4214     *can_autoinc = false;
4215
4216   {
4217     /* Just get the expression, expand it and measure the cost.  */
4218     tree comp = get_computation_at (data->current_loop, use, cand, at);
4219
4220     if (!comp)
4221       return infinite_cost;
4222
4223     if (address_p)
4224       comp = build_simple_mem_ref (comp);
4225
4226     return new_cost (computation_cost (comp, speed), 0);
4227   }
4228 }
4229
4230 /* Determines the cost of the computation by that USE is expressed
4231    from induction variable CAND.  If ADDRESS_P is true, we just need
4232    to create an address from it, otherwise we want to get it into
4233    register.  A set of invariants we depend on is stored in
4234    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4235    autoinc addressing is likely.  */
4236
4237 static comp_cost
4238 get_computation_cost (struct ivopts_data *data,
4239                       struct iv_use *use, struct iv_cand *cand,
4240                       bool address_p, bitmap *depends_on,
4241                       bool *can_autoinc, int *inv_expr_id)
4242 {
4243   return get_computation_cost_at (data,
4244                                   use, cand, address_p, depends_on, use->stmt,
4245                                   can_autoinc, inv_expr_id);
4246 }
4247
4248 /* Determines cost of basing replacement of USE on CAND in a generic
4249    expression.  */
4250
4251 static bool
4252 determine_use_iv_cost_generic (struct ivopts_data *data,
4253                                struct iv_use *use, struct iv_cand *cand)
4254 {
4255   bitmap depends_on;
4256   comp_cost cost;
4257   int inv_expr_id = -1;
4258
4259   /* The simple case first -- if we need to express value of the preserved
4260      original biv, the cost is 0.  This also prevents us from counting the
4261      cost of increment twice -- once at this use and once in the cost of
4262      the candidate.  */
4263   if (cand->pos == IP_ORIGINAL
4264       && cand->incremented_at == use->stmt)
4265     {
4266       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4267                        ERROR_MARK, -1);
4268       return true;
4269     }
4270
4271   cost = get_computation_cost (data, use, cand, false, &depends_on,
4272                                NULL, &inv_expr_id);
4273
4274   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4275                    inv_expr_id);
4276
4277   return !infinite_cost_p (cost);
4278 }
4279
4280 /* Determines cost of basing replacement of USE on CAND in an address.  */
4281
4282 static bool
4283 determine_use_iv_cost_address (struct ivopts_data *data,
4284                                struct iv_use *use, struct iv_cand *cand)
4285 {
4286   bitmap depends_on;
4287   bool can_autoinc;
4288   int inv_expr_id = -1;
4289   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4290                                          &can_autoinc, &inv_expr_id);
4291
4292   if (cand->ainc_use == use)
4293     {
4294       if (can_autoinc)
4295         cost.cost -= cand->cost_step;
4296       /* If we generated the candidate solely for exploiting autoincrement
4297          opportunities, and it turns out it can't be used, set the cost to
4298          infinity to make sure we ignore it.  */
4299       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4300         cost = infinite_cost;
4301     }
4302   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4303                    inv_expr_id);
4304
4305   return !infinite_cost_p (cost);
4306 }
4307
4308 /* Computes value of candidate CAND at position AT in iteration NITER, and
4309    stores it to VAL.  */
4310
4311 static void
4312 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4313                aff_tree *val)
4314 {
4315   aff_tree step, delta, nit;
4316   struct iv *iv = cand->iv;
4317   tree type = TREE_TYPE (iv->base);
4318   tree steptype = type;
4319   if (POINTER_TYPE_P (type))
4320     steptype = sizetype;
4321
4322   tree_to_aff_combination (iv->step, steptype, &step);
4323   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4324   aff_combination_convert (&nit, steptype);
4325   aff_combination_mult (&nit, &step, &delta);
4326   if (stmt_after_increment (loop, cand, at))
4327     aff_combination_add (&delta, &step);
4328
4329   tree_to_aff_combination (iv->base, type, val);
4330   aff_combination_add (val, &delta);
4331 }
4332
4333 /* Returns period of induction variable iv.  */
4334
4335 static tree
4336 iv_period (struct iv *iv)
4337 {
4338   tree step = iv->step, period, type;
4339   tree pow2div;
4340
4341   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4342
4343   type = unsigned_type_for (TREE_TYPE (step));
4344   /* Period of the iv is lcm (step, type_range)/step -1,
4345      i.e., N*type_range/step - 1. Since type range is power
4346      of two, N == (step >> num_of_ending_zeros_binary (step),
4347      so the final result is
4348
4349        (type_range >> num_of_ending_zeros_binary (step)) - 1
4350
4351   */
4352   pow2div = num_ending_zeros (step);
4353
4354   period = build_low_bits_mask (type,
4355                                 (TYPE_PRECISION (type)
4356                                  - tree_low_cst (pow2div, 1)));
4357
4358   return period;
4359 }
4360
4361 /* Returns the comparison operator used when eliminating the iv USE.  */
4362
4363 static enum tree_code
4364 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4365 {
4366   struct loop *loop = data->current_loop;
4367   basic_block ex_bb;
4368   edge exit;
4369
4370   ex_bb = gimple_bb (use->stmt);
4371   exit = EDGE_SUCC (ex_bb, 0);
4372   if (flow_bb_inside_loop_p (loop, exit->dest))
4373     exit = EDGE_SUCC (ex_bb, 1);
4374
4375   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4376 }
4377
4378 static tree
4379 strip_wrap_conserving_type_conversions (tree exp)
4380 {
4381   while (tree_ssa_useless_type_conversion (exp)
4382          && (nowrap_type_p (TREE_TYPE (exp))
4383              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4384     exp = TREE_OPERAND (exp, 0);
4385   return exp;
4386 }
4387
4388 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4389    check for an exact match.  */
4390
4391 static bool
4392 expr_equal_p (tree e, tree what)
4393 {
4394   gimple stmt;
4395   enum tree_code code;
4396
4397   e = strip_wrap_conserving_type_conversions (e);
4398   what = strip_wrap_conserving_type_conversions (what);
4399
4400   code = TREE_CODE (what);
4401   if (TREE_TYPE (e) != TREE_TYPE (what))
4402     return false;
4403
4404   if (operand_equal_p (e, what, 0))
4405     return true;
4406
4407   if (TREE_CODE (e) != SSA_NAME)
4408     return false;
4409
4410   stmt = SSA_NAME_DEF_STMT (e);
4411   if (gimple_code (stmt) != GIMPLE_ASSIGN
4412       || gimple_assign_rhs_code (stmt) != code)
4413     return false;
4414
4415   switch (get_gimple_rhs_class (code))
4416     {
4417     case GIMPLE_BINARY_RHS:
4418       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4419         return false;
4420       /* Fallthru.  */
4421
4422     case GIMPLE_UNARY_RHS:
4423     case GIMPLE_SINGLE_RHS:
4424       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4425     default:
4426       return false;
4427     }
4428 }
4429
4430 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4431    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4432    calculation is performed in non-wrapping type.
4433
4434    TODO: More generally, we could test for the situation that
4435          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4436          This would require knowing the sign of OFFSET.
4437
4438          Also, we only look for the first addition in the computation of BASE.
4439          More complex analysis would be better, but introducing it just for
4440          this optimization seems like an overkill.  */
4441
4442 static bool
4443 difference_cannot_overflow_p (tree base, tree offset)
4444 {
4445   enum tree_code code;
4446   tree e1, e2;
4447
4448   if (!nowrap_type_p (TREE_TYPE (base)))
4449     return false;
4450
4451   base = expand_simple_operations (base);
4452
4453   if (TREE_CODE (base) == SSA_NAME)
4454     {
4455       gimple stmt = SSA_NAME_DEF_STMT (base);
4456
4457       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4458         return false;
4459
4460       code = gimple_assign_rhs_code (stmt);
4461       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4462         return false;
4463
4464       e1 = gimple_assign_rhs1 (stmt);
4465       e2 = gimple_assign_rhs2 (stmt);
4466     }
4467   else
4468     {
4469       code = TREE_CODE (base);
4470       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4471         return false;
4472       e1 = TREE_OPERAND (base, 0);
4473       e2 = TREE_OPERAND (base, 1);
4474     }
4475
4476   /* TODO: deeper inspection may be necessary to prove the equality.  */
4477   switch (code)
4478     {
4479     case PLUS_EXPR:
4480       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4481     case POINTER_PLUS_EXPR:
4482       return expr_equal_p (e2, offset);
4483
4484     default:
4485       return false;
4486     }
4487 }
4488
4489 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4490    comparison with CAND.  NITER describes the number of iterations of
4491    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4492
4493    We aim to handle the following situation:
4494
4495    sometype *base, *p;
4496    int a, b, i;
4497
4498    i = a;
4499    p = p_0 = base + a;
4500
4501    do
4502      {
4503        bla (*p);
4504        p++;
4505        i++;
4506      }
4507    while (i < b);
4508
4509    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4510    We aim to optimize this to
4511
4512    p = p_0 = base + a;
4513    do
4514      {
4515        bla (*p);
4516        p++;
4517      }
4518    while (p < p_0 - a + b);
4519
4520    This preserves the correctness, since the pointer arithmetics does not
4521    overflow.  More precisely:
4522
4523    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4524       overflow in computing it or the values of p.
4525    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4526       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4527
4528 static bool
4529 iv_elimination_compare_lt (struct ivopts_data *data,
4530                            struct iv_cand *cand, enum tree_code *comp_p,
4531                            struct tree_niter_desc *niter)
4532 {
4533   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4534   struct affine_tree_combination nit, tmpa, tmpb;
4535   enum tree_code comp;
4536   HOST_WIDE_INT step;
4537
4538   /* We need to know that the candidate induction variable does not overflow.
4539      While more complex analysis may be used to prove this, for now just
4540      check that the variable appears in the original program and that it
4541      is computed in a type that guarantees no overflows.  */
4542   cand_type = TREE_TYPE (cand->iv->base);
4543   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4544     return false;
4545
4546   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4547      the calculation of the BOUND could overflow, making the comparison
4548      invalid.  */
4549   if (!data->loop_single_exit_p)
4550     return false;
4551
4552   /* We need to be able to decide whether candidate is increasing or decreasing
4553      in order to choose the right comparison operator.  */
4554   if (!cst_and_fits_in_hwi (cand->iv->step))
4555     return false;
4556   step = int_cst_value (cand->iv->step);
4557
4558   /* Check that the number of iterations matches the expected pattern:
4559      a + 1 > b ? 0 : b - a - 1.  */
4560   mbz = niter->may_be_zero;
4561   if (TREE_CODE (mbz) == GT_EXPR)
4562     {
4563       /* Handle a + 1 > b.  */
4564       tree op0 = TREE_OPERAND (mbz, 0);
4565       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4566         {
4567           a = TREE_OPERAND (op0, 0);
4568           b = TREE_OPERAND (mbz, 1);
4569         }
4570       else
4571         return false;
4572     }
4573   else if (TREE_CODE (mbz) == LT_EXPR)
4574     {
4575       tree op1 = TREE_OPERAND (mbz, 1);
4576
4577       /* Handle b < a + 1.  */
4578       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4579         {
4580           a = TREE_OPERAND (op1, 0);
4581           b = TREE_OPERAND (mbz, 0);
4582         }
4583       else
4584         return false;
4585     }
4586   else
4587     return false;
4588
4589   /* Expected number of iterations is B - A - 1.  Check that it matches
4590      the actual number, i.e., that B - A - NITER = 1.  */
4591   tree_to_aff_combination (niter->niter, nit_type, &nit);
4592   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4593   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4594   aff_combination_scale (&nit, double_int_minus_one);
4595   aff_combination_scale (&tmpa, double_int_minus_one);
4596   aff_combination_add (&tmpb, &tmpa);
4597   aff_combination_add (&tmpb, &nit);
4598   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4599     return false;
4600
4601   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4602      overflow.  */
4603   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4604                         cand->iv->step,
4605                         fold_convert (TREE_TYPE (cand->iv->step), a));
4606   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4607     return false;
4608
4609   /* Determine the new comparison operator.  */
4610   comp = step < 0 ? GT_EXPR : LT_EXPR;
4611   if (*comp_p == NE_EXPR)
4612     *comp_p = comp;
4613   else if (*comp_p == EQ_EXPR)
4614     *comp_p = invert_tree_comparison (comp, false);
4615   else
4616     gcc_unreachable ();
4617
4618   return true;
4619 }
4620
4621 /* Check whether it is possible to express the condition in USE by comparison
4622    of candidate CAND.  If so, store the value compared with to BOUND, and the
4623    comparison operator to COMP.  */
4624
4625 static bool
4626 may_eliminate_iv (struct ivopts_data *data,
4627                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4628                   enum tree_code *comp)
4629 {
4630   basic_block ex_bb;
4631   edge exit;
4632   tree period;
4633   struct loop *loop = data->current_loop;
4634   aff_tree bnd;
4635   struct tree_niter_desc *desc = NULL;
4636
4637   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4638     return false;
4639
4640   /* For now works only for exits that dominate the loop latch.
4641      TODO: extend to other conditions inside loop body.  */
4642   ex_bb = gimple_bb (use->stmt);
4643   if (use->stmt != last_stmt (ex_bb)
4644       || gimple_code (use->stmt) != GIMPLE_COND
4645       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4646     return false;
4647
4648   exit = EDGE_SUCC (ex_bb, 0);
4649   if (flow_bb_inside_loop_p (loop, exit->dest))
4650     exit = EDGE_SUCC (ex_bb, 1);
4651   if (flow_bb_inside_loop_p (loop, exit->dest))
4652     return false;
4653
4654   desc = niter_for_exit (data, exit);
4655   if (!desc)
4656     return false;
4657
4658   /* Determine whether we can use the variable to test the exit condition.
4659      This is the case iff the period of the induction variable is greater
4660      than the number of iterations for which the exit condition is true.  */
4661   period = iv_period (cand->iv);
4662
4663   /* If the number of iterations is constant, compare against it directly.  */
4664   if (TREE_CODE (desc->niter) == INTEGER_CST)
4665     {
4666       /* See cand_value_at.  */
4667       if (stmt_after_increment (loop, cand, use->stmt))
4668         {
4669           if (!tree_int_cst_lt (desc->niter, period))
4670             return false;
4671         }
4672       else
4673         {
4674           if (tree_int_cst_lt (period, desc->niter))
4675             return false;
4676         }
4677     }
4678
4679   /* If not, and if this is the only possible exit of the loop, see whether
4680      we can get a conservative estimate on the number of iterations of the
4681      entire loop and compare against that instead.  */
4682   else
4683     {
4684       double_int period_value, max_niter;
4685
4686       max_niter = desc->max;
4687       if (stmt_after_increment (loop, cand, use->stmt))
4688         max_niter += double_int_one;
4689       period_value = tree_to_double_int (period);
4690       if (max_niter.ugt (period_value))
4691         {
4692           /* See if we can take advantage of inferred loop bound information.  */
4693           if (data->loop_single_exit_p)
4694             {
4695               if (!max_loop_iterations (loop, &max_niter))
4696                 return false;
4697               /* The loop bound is already adjusted by adding 1.  */
4698               if (max_niter.ugt (period_value))
4699                 return false;
4700             }
4701           else
4702             return false;
4703         }
4704     }
4705
4706   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4707
4708   *bound = aff_combination_to_tree (&bnd);
4709   *comp = iv_elimination_compare (data, use);
4710
4711   /* It is unlikely that computing the number of iterations using division
4712      would be more profitable than keeping the original induction variable.  */
4713   if (expression_expensive_p (*bound))
4714     return false;
4715
4716   /* Sometimes, it is possible to handle the situation that the number of
4717      iterations may be zero unless additional assumtions by using <
4718      instead of != in the exit condition.
4719
4720      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4721            base the exit condition on it.  However, that is often too
4722            expensive.  */
4723   if (!integer_zerop (desc->may_be_zero))
4724     return iv_elimination_compare_lt (data, cand, comp, desc);
4725
4726   return true;
4727 }
4728
4729  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4730     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4731
4732 static int
4733 parm_decl_cost (struct ivopts_data *data, tree bound)
4734 {
4735   tree sbound = bound;
4736   STRIP_NOPS (sbound);
4737
4738   if (TREE_CODE (sbound) == SSA_NAME
4739       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4740       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4741       && data->body_includes_call)
4742     return COSTS_N_INSNS (1);
4743
4744   return 0;
4745 }
4746
4747 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4748
4749 static bool
4750 determine_use_iv_cost_condition (struct ivopts_data *data,
4751                                  struct iv_use *use, struct iv_cand *cand)
4752 {
4753   tree bound = NULL_TREE;
4754   struct iv *cmp_iv;
4755   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4756   comp_cost elim_cost, express_cost, cost, bound_cost;
4757   bool ok;
4758   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4759   tree *control_var, *bound_cst;
4760   enum tree_code comp = ERROR_MARK;
4761
4762   /* Only consider real candidates.  */
4763   if (!cand->iv)
4764     {
4765       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4766                        ERROR_MARK, -1);
4767       return false;
4768     }
4769
4770   /* Try iv elimination.  */
4771   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4772     {
4773       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4774       if (elim_cost.cost == 0)
4775         elim_cost.cost = parm_decl_cost (data, bound);
4776       else if (TREE_CODE (bound) == INTEGER_CST)
4777         elim_cost.cost = 0;
4778       /* If we replace a loop condition 'i < n' with 'p < base + n',
4779          depends_on_elim will have 'base' and 'n' set, which implies
4780          that both 'base' and 'n' will be live during the loop.  More likely,
4781          'base + n' will be loop invariant, resulting in only one live value
4782          during the loop.  So in that case we clear depends_on_elim and set
4783         elim_inv_expr_id instead.  */
4784       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4785         {
4786           elim_inv_expr_id = get_expr_id (data, bound);
4787           bitmap_clear (depends_on_elim);
4788         }
4789       /* The bound is a loop invariant, so it will be only computed
4790          once.  */
4791       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4792     }
4793   else
4794     elim_cost = infinite_cost;
4795
4796   /* Try expressing the original giv.  If it is compared with an invariant,
4797      note that we cannot get rid of it.  */
4798   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4799                               NULL, &cmp_iv);
4800   gcc_assert (ok);
4801
4802   /* When the condition is a comparison of the candidate IV against
4803      zero, prefer this IV.
4804
4805      TODO: The constant that we're subtracting from the cost should
4806      be target-dependent.  This information should be added to the
4807      target costs for each backend.  */
4808   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4809       && integer_zerop (*bound_cst)
4810       && (operand_equal_p (*control_var, cand->var_after, 0)
4811           || operand_equal_p (*control_var, cand->var_before, 0)))
4812     elim_cost.cost -= 1;
4813
4814   express_cost = get_computation_cost (data, use, cand, false,
4815                                        &depends_on_express, NULL,
4816                                        &express_inv_expr_id);
4817   fd_ivopts_data = data;
4818   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4819
4820   /* Count the cost of the original bound as well.  */
4821   bound_cost = force_var_cost (data, *bound_cst, NULL);
4822   if (bound_cost.cost == 0)
4823     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4824   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4825     bound_cost.cost = 0;
4826   express_cost.cost += bound_cost.cost;
4827
4828   /* Choose the better approach, preferring the eliminated IV. */
4829   if (compare_costs (elim_cost, express_cost) <= 0)
4830     {
4831       cost = elim_cost;
4832       depends_on = depends_on_elim;
4833       depends_on_elim = NULL;
4834       inv_expr_id = elim_inv_expr_id;
4835     }
4836   else
4837     {
4838       cost = express_cost;
4839       depends_on = depends_on_express;
4840       depends_on_express = NULL;
4841       bound = NULL_TREE;
4842       comp = ERROR_MARK;
4843       inv_expr_id = express_inv_expr_id;
4844     }
4845
4846   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4847
4848   if (depends_on_elim)
4849     BITMAP_FREE (depends_on_elim);
4850   if (depends_on_express)
4851     BITMAP_FREE (depends_on_express);
4852
4853   return !infinite_cost_p (cost);
4854 }
4855
4856 /* Determines cost of basing replacement of USE on CAND.  Returns false
4857    if USE cannot be based on CAND.  */
4858
4859 static bool
4860 determine_use_iv_cost (struct ivopts_data *data,
4861                        struct iv_use *use, struct iv_cand *cand)
4862 {
4863   switch (use->type)
4864     {
4865     case USE_NONLINEAR_EXPR:
4866       return determine_use_iv_cost_generic (data, use, cand);
4867
4868     case USE_ADDRESS:
4869       return determine_use_iv_cost_address (data, use, cand);
4870
4871     case USE_COMPARE:
4872       return determine_use_iv_cost_condition (data, use, cand);
4873
4874     default:
4875       gcc_unreachable ();
4876     }
4877 }
4878
4879 /* Return true if get_computation_cost indicates that autoincrement is
4880    a possibility for the pair of USE and CAND, false otherwise.  */
4881
4882 static bool
4883 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4884                            struct iv_cand *cand)
4885 {
4886   bitmap depends_on;
4887   bool can_autoinc;
4888   comp_cost cost;
4889
4890   if (use->type != USE_ADDRESS)
4891     return false;
4892
4893   cost = get_computation_cost (data, use, cand, true, &depends_on,
4894                                &can_autoinc, NULL);
4895
4896   BITMAP_FREE (depends_on);
4897
4898   return !infinite_cost_p (cost) && can_autoinc;
4899 }
4900
4901 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4902    use that allows autoincrement, and set their AINC_USE if possible.  */
4903
4904 static void
4905 set_autoinc_for_original_candidates (struct ivopts_data *data)
4906 {
4907   unsigned i, j;
4908
4909   for (i = 0; i < n_iv_cands (data); i++)
4910     {
4911       struct iv_cand *cand = iv_cand (data, i);
4912       struct iv_use *closest_before = NULL;
4913       struct iv_use *closest_after = NULL;
4914       if (cand->pos != IP_ORIGINAL)
4915         continue;
4916
4917       for (j = 0; j < n_iv_uses (data); j++)
4918         {
4919           struct iv_use *use = iv_use (data, j);
4920           unsigned uid = gimple_uid (use->stmt);
4921
4922           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4923             continue;
4924
4925           if (uid < gimple_uid (cand->incremented_at)
4926               && (closest_before == NULL
4927                   || uid > gimple_uid (closest_before->stmt)))
4928             closest_before = use;
4929
4930           if (uid > gimple_uid (cand->incremented_at)
4931               && (closest_after == NULL
4932                   || uid < gimple_uid (closest_after->stmt)))
4933             closest_after = use;
4934         }
4935
4936       if (closest_before != NULL
4937           && autoinc_possible_for_pair (data, closest_before, cand))
4938         cand->ainc_use = closest_before;
4939       else if (closest_after != NULL
4940                && autoinc_possible_for_pair (data, closest_after, cand))
4941         cand->ainc_use = closest_after;
4942     }
4943 }
4944
4945 /* Finds the candidates for the induction variables.  */
4946
4947 static void
4948 find_iv_candidates (struct ivopts_data *data)
4949 {
4950   /* Add commonly used ivs.  */
4951   add_standard_iv_candidates (data);
4952
4953   /* Add old induction variables.  */
4954   add_old_ivs_candidates (data);
4955
4956   /* Add induction variables derived from uses.  */
4957   add_derived_ivs_candidates (data);
4958
4959   set_autoinc_for_original_candidates (data);
4960
4961   /* Record the important candidates.  */
4962   record_important_candidates (data);
4963 }
4964
4965 /* Determines costs of basing the use of the iv on an iv candidate.  */
4966
4967 static void
4968 determine_use_iv_costs (struct ivopts_data *data)
4969 {
4970   unsigned i, j;
4971   struct iv_use *use;
4972   struct iv_cand *cand;
4973   bitmap to_clear = BITMAP_ALLOC (NULL);
4974
4975   alloc_use_cost_map (data);
4976
4977   for (i = 0; i < n_iv_uses (data); i++)
4978     {
4979       use = iv_use (data, i);
4980
4981       if (data->consider_all_candidates)
4982         {
4983           for (j = 0; j < n_iv_cands (data); j++)
4984             {
4985               cand = iv_cand (data, j);
4986               determine_use_iv_cost (data, use, cand);
4987             }
4988         }
4989       else
4990         {
4991           bitmap_iterator bi;
4992
4993           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4994             {
4995               cand = iv_cand (data, j);
4996               if (!determine_use_iv_cost (data, use, cand))
4997                 bitmap_set_bit (to_clear, j);
4998             }
4999
5000           /* Remove the candidates for that the cost is infinite from
5001              the list of related candidates.  */
5002           bitmap_and_compl_into (use->related_cands, to_clear);
5003           bitmap_clear (to_clear);
5004         }
5005     }
5006
5007   BITMAP_FREE (to_clear);
5008
5009   if (dump_file && (dump_flags & TDF_DETAILS))
5010     {
5011       fprintf (dump_file, "Use-candidate costs:\n");
5012
5013       for (i = 0; i < n_iv_uses (data); i++)
5014         {
5015           use = iv_use (data, i);
5016
5017           fprintf (dump_file, "Use %d:\n", i);
5018           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5019           for (j = 0; j < use->n_map_members; j++)
5020             {
5021               if (!use->cost_map[j].cand
5022                   || infinite_cost_p (use->cost_map[j].cost))
5023                 continue;
5024
5025               fprintf (dump_file, "  %d\t%d\t%d\t",
5026                        use->cost_map[j].cand->id,
5027                        use->cost_map[j].cost.cost,
5028                        use->cost_map[j].cost.complexity);
5029               if (use->cost_map[j].depends_on)
5030                 bitmap_print (dump_file,
5031                               use->cost_map[j].depends_on, "","");
5032               if (use->cost_map[j].inv_expr_id != -1)
5033                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5034               fprintf (dump_file, "\n");
5035             }
5036
5037           fprintf (dump_file, "\n");
5038         }
5039       fprintf (dump_file, "\n");
5040     }
5041 }
5042
5043 /* Determines cost of the candidate CAND.  */
5044
5045 static void
5046 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5047 {
5048   comp_cost cost_base;
5049   unsigned cost, cost_step;
5050   tree base;
5051
5052   if (!cand->iv)
5053     {
5054       cand->cost = 0;
5055       return;
5056     }
5057
5058   /* There are two costs associated with the candidate -- its increment
5059      and its initialization.  The second is almost negligible for any loop
5060      that rolls enough, so we take it just very little into account.  */
5061
5062   base = cand->iv->base;
5063   cost_base = force_var_cost (data, base, NULL);
5064   /* It will be exceptional that the iv register happens to be initialized with
5065      the proper value at no cost.  In general, there will at least be a regcopy
5066      or a const set.  */
5067   if (cost_base.cost == 0)
5068     cost_base.cost = COSTS_N_INSNS (1);
5069   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5070
5071   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5072
5073   /* Prefer the original ivs unless we may gain something by replacing it.
5074      The reason is to make debugging simpler; so this is not relevant for
5075      artificial ivs created by other optimization passes.  */
5076   if (cand->pos != IP_ORIGINAL
5077       || !SSA_NAME_VAR (cand->var_before)
5078       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5079     cost++;
5080
5081   /* Prefer not to insert statements into latch unless there are some
5082      already (so that we do not create unnecessary jumps).  */
5083   if (cand->pos == IP_END
5084       && empty_block_p (ip_end_pos (data->current_loop)))
5085     cost++;
5086
5087   cand->cost = cost;
5088   cand->cost_step = cost_step;
5089 }
5090
5091 /* Determines costs of computation of the candidates.  */
5092
5093 static void
5094 determine_iv_costs (struct ivopts_data *data)
5095 {
5096   unsigned i;
5097
5098   if (dump_file && (dump_flags & TDF_DETAILS))
5099     {
5100       fprintf (dump_file, "Candidate costs:\n");
5101       fprintf (dump_file, "  cand\tcost\n");
5102     }
5103
5104   for (i = 0; i < n_iv_cands (data); i++)
5105     {
5106       struct iv_cand *cand = iv_cand (data, i);
5107
5108       determine_iv_cost (data, cand);
5109
5110       if (dump_file && (dump_flags & TDF_DETAILS))
5111         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5112     }
5113
5114   if (dump_file && (dump_flags & TDF_DETAILS))
5115     fprintf (dump_file, "\n");
5116 }
5117
5118 /* Calculates cost for having SIZE induction variables.  */
5119
5120 static unsigned
5121 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5122 {
5123   /* We add size to the cost, so that we prefer eliminating ivs
5124      if possible.  */
5125   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5126                                             data->body_includes_call);
5127 }
5128
5129 /* For each size of the induction variable set determine the penalty.  */
5130
5131 static void
5132 determine_set_costs (struct ivopts_data *data)
5133 {
5134   unsigned j, n;
5135   gimple phi;
5136   gimple_stmt_iterator psi;
5137   tree op;
5138   struct loop *loop = data->current_loop;
5139   bitmap_iterator bi;
5140
5141   if (dump_file && (dump_flags & TDF_DETAILS))
5142     {
5143       fprintf (dump_file, "Global costs:\n");
5144       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5145       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5146       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5147       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5148     }
5149
5150   n = 0;
5151   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5152     {
5153       phi = gsi_stmt (psi);
5154       op = PHI_RESULT (phi);
5155
5156       if (virtual_operand_p (op))
5157         continue;
5158
5159       if (get_iv (data, op))
5160         continue;
5161
5162       n++;
5163     }
5164
5165   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5166     {
5167       struct version_info *info = ver_info (data, j);
5168
5169       if (info->inv_id && info->has_nonlin_use)
5170         n++;
5171     }
5172
5173   data->regs_used = n;
5174   if (dump_file && (dump_flags & TDF_DETAILS))
5175     fprintf (dump_file, "  regs_used %d\n", n);
5176
5177   if (dump_file && (dump_flags & TDF_DETAILS))
5178     {
5179       fprintf (dump_file, "  cost for size:\n");
5180       fprintf (dump_file, "  ivs\tcost\n");
5181       for (j = 0; j <= 2 * target_avail_regs; j++)
5182         fprintf (dump_file, "  %d\t%d\n", j,
5183                  ivopts_global_cost_for_size (data, j));
5184       fprintf (dump_file, "\n");
5185     }
5186 }
5187
5188 /* Returns true if A is a cheaper cost pair than B.  */
5189
5190 static bool
5191 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5192 {
5193   int cmp;
5194
5195   if (!a)
5196     return false;
5197
5198   if (!b)
5199     return true;
5200
5201   cmp = compare_costs (a->cost, b->cost);
5202   if (cmp < 0)
5203     return true;
5204
5205   if (cmp > 0)
5206     return false;
5207
5208   /* In case the costs are the same, prefer the cheaper candidate.  */
5209   if (a->cand->cost < b->cand->cost)
5210     return true;
5211
5212   return false;
5213 }
5214
5215
5216 /* Returns candidate by that USE is expressed in IVS.  */
5217
5218 static struct cost_pair *
5219 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5220 {
5221   return ivs->cand_for_use[use->id];
5222 }
5223
5224 /* Computes the cost field of IVS structure.  */
5225
5226 static void
5227 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5228 {
5229   comp_cost cost = ivs->cand_use_cost;
5230
5231   cost.cost += ivs->cand_cost;
5232
5233   cost.cost += ivopts_global_cost_for_size (data,
5234                                             ivs->n_regs + ivs->num_used_inv_expr);
5235
5236   ivs->cost = cost;
5237 }
5238
5239 /* Remove invariants in set INVS to set IVS.  */
5240
5241 static void
5242 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5243 {
5244   bitmap_iterator bi;
5245   unsigned iid;
5246
5247   if (!invs)
5248     return;
5249
5250   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5251     {
5252       ivs->n_invariant_uses[iid]--;
5253       if (ivs->n_invariant_uses[iid] == 0)
5254         ivs->n_regs--;
5255     }
5256 }
5257
5258 /* Set USE not to be expressed by any candidate in IVS.  */
5259
5260 static void
5261 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5262                  struct iv_use *use)
5263 {
5264   unsigned uid = use->id, cid;
5265   struct cost_pair *cp;
5266
5267   cp = ivs->cand_for_use[uid];
5268   if (!cp)
5269     return;
5270   cid = cp->cand->id;
5271
5272   ivs->bad_uses++;
5273   ivs->cand_for_use[uid] = NULL;
5274   ivs->n_cand_uses[cid]--;
5275
5276   if (ivs->n_cand_uses[cid] == 0)
5277     {
5278       bitmap_clear_bit (ivs->cands, cid);
5279       /* Do not count the pseudocandidates.  */
5280       if (cp->cand->iv)
5281         ivs->n_regs--;
5282       ivs->n_cands--;
5283       ivs->cand_cost -= cp->cand->cost;
5284
5285       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5286     }
5287
5288   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5289
5290   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5291
5292   if (cp->inv_expr_id != -1)
5293     {
5294       ivs->used_inv_expr[cp->inv_expr_id]--;
5295       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5296         ivs->num_used_inv_expr--;
5297     }
5298   iv_ca_recount_cost (data, ivs);
5299 }
5300
5301 /* Add invariants in set INVS to set IVS.  */
5302
5303 static void
5304 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5305 {
5306   bitmap_iterator bi;
5307   unsigned iid;
5308
5309   if (!invs)
5310     return;
5311
5312   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5313     {
5314       ivs->n_invariant_uses[iid]++;
5315       if (ivs->n_invariant_uses[iid] == 1)
5316         ivs->n_regs++;
5317     }
5318 }
5319
5320 /* Set cost pair for USE in set IVS to CP.  */
5321
5322 static void
5323 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5324               struct iv_use *use, struct cost_pair *cp)
5325 {
5326   unsigned uid = use->id, cid;
5327
5328   if (ivs->cand_for_use[uid] == cp)
5329     return;
5330
5331   if (ivs->cand_for_use[uid])
5332     iv_ca_set_no_cp (data, ivs, use);
5333
5334   if (cp)
5335     {
5336       cid = cp->cand->id;
5337
5338       ivs->bad_uses--;
5339       ivs->cand_for_use[uid] = cp;
5340       ivs->n_cand_uses[cid]++;
5341       if (ivs->n_cand_uses[cid] == 1)
5342         {
5343           bitmap_set_bit (ivs->cands, cid);
5344           /* Do not count the pseudocandidates.  */
5345           if (cp->cand->iv)
5346             ivs->n_regs++;
5347           ivs->n_cands++;
5348           ivs->cand_cost += cp->cand->cost;
5349
5350           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5351         }
5352
5353       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5354       iv_ca_set_add_invariants (ivs, cp->depends_on);
5355
5356       if (cp->inv_expr_id != -1)
5357         {
5358           ivs->used_inv_expr[cp->inv_expr_id]++;
5359           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5360             ivs->num_used_inv_expr++;
5361         }
5362       iv_ca_recount_cost (data, ivs);
5363     }
5364 }
5365
5366 /* Extend set IVS by expressing USE by some of the candidates in it
5367    if possible. All important candidates will be considered
5368    if IMPORTANT_CANDIDATES is true.  */
5369
5370 static void
5371 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5372                struct iv_use *use, bool important_candidates)
5373 {
5374   struct cost_pair *best_cp = NULL, *cp;
5375   bitmap_iterator bi;
5376   bitmap cands;
5377   unsigned i;
5378
5379   gcc_assert (ivs->upto >= use->id);
5380
5381   if (ivs->upto == use->id)
5382     {
5383       ivs->upto++;
5384       ivs->bad_uses++;
5385     }
5386
5387   cands = (important_candidates ? data->important_candidates : ivs->cands);
5388   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5389     {
5390       struct iv_cand *cand = iv_cand (data, i);
5391
5392       cp = get_use_iv_cost (data, use, cand);
5393
5394       if (cheaper_cost_pair (cp, best_cp))
5395         best_cp = cp;
5396     }
5397
5398   iv_ca_set_cp (data, ivs, use, best_cp);
5399 }
5400
5401 /* Get cost for assignment IVS.  */
5402
5403 static comp_cost
5404 iv_ca_cost (struct iv_ca *ivs)
5405 {
5406   /* This was a conditional expression but it triggered a bug in
5407      Sun C 5.5.  */
5408   if (ivs->bad_uses)
5409     return infinite_cost;
5410   else
5411     return ivs->cost;
5412 }
5413
5414 /* Returns true if all dependences of CP are among invariants in IVS.  */
5415
5416 static bool
5417 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5418 {
5419   unsigned i;
5420   bitmap_iterator bi;
5421
5422   if (!cp->depends_on)
5423     return true;
5424
5425   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5426     {
5427       if (ivs->n_invariant_uses[i] == 0)
5428         return false;
5429     }
5430
5431   return true;
5432 }
5433
5434 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5435    it before NEXT_CHANGE.  */
5436
5437 static struct iv_ca_delta *
5438 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5439                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5440 {
5441   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5442
5443   change->use = use;
5444   change->old_cp = old_cp;
5445   change->new_cp = new_cp;
5446   change->next_change = next_change;
5447
5448   return change;
5449 }
5450
5451 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5452    are rewritten.  */
5453
5454 static struct iv_ca_delta *
5455 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5456 {
5457   struct iv_ca_delta *last;
5458
5459   if (!l2)
5460     return l1;
5461
5462   if (!l1)
5463     return l2;
5464
5465   for (last = l1; last->next_change; last = last->next_change)
5466     continue;
5467   last->next_change = l2;
5468
5469   return l1;
5470 }
5471
5472 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5473
5474 static struct iv_ca_delta *
5475 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5476 {
5477   struct iv_ca_delta *act, *next, *prev = NULL;
5478   struct cost_pair *tmp;
5479
5480   for (act = delta; act; act = next)
5481     {
5482       next = act->next_change;
5483       act->next_change = prev;
5484       prev = act;
5485
5486       tmp = act->old_cp;
5487       act->old_cp = act->new_cp;
5488       act->new_cp = tmp;
5489     }
5490
5491   return prev;
5492 }
5493
5494 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5495    reverted instead.  */
5496
5497 static void
5498 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5499                     struct iv_ca_delta *delta, bool forward)
5500 {
5501   struct cost_pair *from, *to;
5502   struct iv_ca_delta *act;
5503
5504   if (!forward)
5505     delta = iv_ca_delta_reverse (delta);
5506
5507   for (act = delta; act; act = act->next_change)
5508     {
5509       from = act->old_cp;
5510       to = act->new_cp;
5511       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5512       iv_ca_set_cp (data, ivs, act->use, to);
5513     }
5514
5515   if (!forward)
5516     iv_ca_delta_reverse (delta);
5517 }
5518
5519 /* Returns true if CAND is used in IVS.  */
5520
5521 static bool
5522 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5523 {
5524   return ivs->n_cand_uses[cand->id] > 0;
5525 }
5526
5527 /* Returns number of induction variable candidates in the set IVS.  */
5528
5529 static unsigned
5530 iv_ca_n_cands (struct iv_ca *ivs)
5531 {
5532   return ivs->n_cands;
5533 }
5534
5535 /* Free the list of changes DELTA.  */
5536
5537 static void
5538 iv_ca_delta_free (struct iv_ca_delta **delta)
5539 {
5540   struct iv_ca_delta *act, *next;
5541
5542   for (act = *delta; act; act = next)
5543     {
5544       next = act->next_change;
5545       free (act);
5546     }
5547
5548   *delta = NULL;
5549 }
5550
5551 /* Allocates new iv candidates assignment.  */
5552
5553 static struct iv_ca *
5554 iv_ca_new (struct ivopts_data *data)
5555 {
5556   struct iv_ca *nw = XNEW (struct iv_ca);
5557
5558   nw->upto = 0;
5559   nw->bad_uses = 0;
5560   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5561   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5562   nw->cands = BITMAP_ALLOC (NULL);
5563   nw->n_cands = 0;
5564   nw->n_regs = 0;
5565   nw->cand_use_cost = no_cost;
5566   nw->cand_cost = 0;
5567   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5568   nw->cost = no_cost;
5569   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5570   nw->num_used_inv_expr = 0;
5571
5572   return nw;
5573 }
5574
5575 /* Free memory occupied by the set IVS.  */
5576
5577 static void
5578 iv_ca_free (struct iv_ca **ivs)
5579 {
5580   free ((*ivs)->cand_for_use);
5581   free ((*ivs)->n_cand_uses);
5582   BITMAP_FREE ((*ivs)->cands);
5583   free ((*ivs)->n_invariant_uses);
5584   free ((*ivs)->used_inv_expr);
5585   free (*ivs);
5586   *ivs = NULL;
5587 }
5588
5589 /* Dumps IVS to FILE.  */
5590
5591 static void
5592 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5593 {
5594   const char *pref = "  invariants ";
5595   unsigned i;
5596   comp_cost cost = iv_ca_cost (ivs);
5597
5598   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5599   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5600            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5601   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5602
5603    for (i = 0; i < ivs->upto; i++)
5604     {
5605       struct iv_use *use = iv_use (data, i);
5606       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5607       if (cp)
5608         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5609                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5610       else
5611         fprintf (file, "   use:%d --> ??\n", use->id);
5612     }
5613
5614   for (i = 1; i <= data->max_inv_id; i++)
5615     if (ivs->n_invariant_uses[i])
5616       {
5617         fprintf (file, "%s%d", pref, i);
5618         pref = ", ";
5619       }
5620   fprintf (file, "\n\n");
5621 }
5622
5623 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5624    new set, and store differences in DELTA.  Number of induction variables
5625    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5626    the function will try to find a solution with mimimal iv candidates.  */
5627
5628 static comp_cost
5629 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5630               struct iv_cand *cand, struct iv_ca_delta **delta,
5631               unsigned *n_ivs, bool min_ncand)
5632 {
5633   unsigned i;
5634   comp_cost cost;
5635   struct iv_use *use;
5636   struct cost_pair *old_cp, *new_cp;
5637
5638   *delta = NULL;
5639   for (i = 0; i < ivs->upto; i++)
5640     {
5641       use = iv_use (data, i);
5642       old_cp = iv_ca_cand_for_use (ivs, use);
5643
5644       if (old_cp
5645           && old_cp->cand == cand)
5646         continue;
5647
5648       new_cp = get_use_iv_cost (data, use, cand);
5649       if (!new_cp)
5650         continue;
5651
5652       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5653         continue;
5654
5655       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5656         continue;
5657
5658       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5659     }
5660
5661   iv_ca_delta_commit (data, ivs, *delta, true);
5662   cost = iv_ca_cost (ivs);
5663   if (n_ivs)
5664     *n_ivs = iv_ca_n_cands (ivs);
5665   iv_ca_delta_commit (data, ivs, *delta, false);
5666
5667   return cost;
5668 }
5669
5670 /* Try narrowing set IVS by removing CAND.  Return the cost of
5671    the new set and store the differences in DELTA.  */
5672
5673 static comp_cost
5674 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5675               struct iv_cand *cand, struct iv_ca_delta **delta)
5676 {
5677   unsigned i, ci;
5678   struct iv_use *use;
5679   struct cost_pair *old_cp, *new_cp, *cp;
5680   bitmap_iterator bi;
5681   struct iv_cand *cnd;
5682   comp_cost cost;
5683
5684   *delta = NULL;
5685   for (i = 0; i < n_iv_uses (data); i++)
5686     {
5687       use = iv_use (data, i);
5688
5689       old_cp = iv_ca_cand_for_use (ivs, use);
5690       if (old_cp->cand != cand)
5691         continue;
5692
5693       new_cp = NULL;
5694
5695       if (data->consider_all_candidates)
5696         {
5697           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5698             {
5699               if (ci == cand->id)
5700                 continue;
5701
5702               cnd = iv_cand (data, ci);
5703
5704               cp = get_use_iv_cost (data, use, cnd);
5705               if (!cp)
5706                 continue;
5707
5708               if (!iv_ca_has_deps (ivs, cp))
5709                 continue;
5710
5711               if (!cheaper_cost_pair (cp, new_cp))
5712                 continue;
5713
5714               new_cp = cp;
5715             }
5716         }
5717       else
5718         {
5719           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5720             {
5721               if (ci == cand->id)
5722                 continue;
5723
5724               cnd = iv_cand (data, ci);
5725
5726               cp = get_use_iv_cost (data, use, cnd);
5727               if (!cp)
5728                 continue;
5729               if (!iv_ca_has_deps (ivs, cp))
5730                 continue;
5731
5732               if (!cheaper_cost_pair (cp, new_cp))
5733                 continue;
5734
5735               new_cp = cp;
5736             }
5737         }
5738
5739       if (!new_cp)
5740         {
5741           iv_ca_delta_free (delta);
5742           return infinite_cost;
5743         }
5744
5745       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5746     }
5747
5748   iv_ca_delta_commit (data, ivs, *delta, true);
5749   cost = iv_ca_cost (ivs);
5750   iv_ca_delta_commit (data, ivs, *delta, false);
5751
5752   return cost;
5753 }
5754
5755 /* Try optimizing the set of candidates IVS by removing candidates different
5756    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5757    differences in DELTA.  */
5758
5759 static comp_cost
5760 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5761              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5762 {
5763   bitmap_iterator bi;
5764   struct iv_ca_delta *act_delta, *best_delta;
5765   unsigned i;
5766   comp_cost best_cost, acost;
5767   struct iv_cand *cand;
5768
5769   best_delta = NULL;
5770   best_cost = iv_ca_cost (ivs);
5771
5772   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5773     {
5774       cand = iv_cand (data, i);
5775
5776       if (cand == except_cand)
5777         continue;
5778
5779       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5780
5781       if (compare_costs (acost, best_cost) < 0)
5782         {
5783           best_cost = acost;
5784           iv_ca_delta_free (&best_delta);
5785           best_delta = act_delta;
5786         }
5787       else
5788         iv_ca_delta_free (&act_delta);
5789     }
5790
5791   if (!best_delta)
5792     {
5793       *delta = NULL;
5794       return best_cost;
5795     }
5796
5797   /* Recurse to possibly remove other unnecessary ivs.  */
5798   iv_ca_delta_commit (data, ivs, best_delta, true);
5799   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5800   iv_ca_delta_commit (data, ivs, best_delta, false);
5801   *delta = iv_ca_delta_join (best_delta, *delta);
5802   return best_cost;
5803 }
5804
5805 /* Tries to extend the sets IVS in the best possible way in order
5806    to express the USE.  If ORIGINALP is true, prefer candidates from
5807    the original set of IVs, otherwise favor important candidates not
5808    based on any memory object.  */
5809
5810 static bool
5811 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5812                   struct iv_use *use, bool originalp)
5813 {
5814   comp_cost best_cost, act_cost;
5815   unsigned i;
5816   bitmap_iterator bi;
5817   struct iv_cand *cand;
5818   struct iv_ca_delta *best_delta = NULL, *act_delta;
5819   struct cost_pair *cp;
5820
5821   iv_ca_add_use (data, ivs, use, false);
5822   best_cost = iv_ca_cost (ivs);
5823
5824   cp = iv_ca_cand_for_use (ivs, use);
5825   if (!cp)
5826     {
5827       ivs->upto--;
5828       ivs->bad_uses--;
5829       iv_ca_add_use (data, ivs, use, true);
5830       best_cost = iv_ca_cost (ivs);
5831       cp = iv_ca_cand_for_use (ivs, use);
5832     }
5833   if (cp)
5834     {
5835       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5836       iv_ca_set_no_cp (data, ivs, use);
5837     }
5838
5839   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5840      first try important candidates not based on any memory object.  Only if
5841      this fails, try the specific ones.  Rationale -- in loops with many
5842      variables the best choice often is to use just one generic biv.  If we
5843      added here many ivs specific to the uses, the optimization algorithm later
5844      would be likely to get stuck in a local minimum, thus causing us to create
5845      too many ivs.  The approach from few ivs to more seems more likely to be
5846      successful -- starting from few ivs, replacing an expensive use by a
5847      specific iv should always be a win.  */
5848   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5849     {
5850       cand = iv_cand (data, i);
5851
5852       if (originalp && cand->pos !=IP_ORIGINAL)
5853         continue;
5854
5855       if (!originalp && cand->iv->base_object != NULL_TREE)
5856         continue;
5857
5858       if (iv_ca_cand_used_p (ivs, cand))
5859         continue;
5860
5861       cp = get_use_iv_cost (data, use, cand);
5862       if (!cp)
5863         continue;
5864
5865       iv_ca_set_cp (data, ivs, use, cp);
5866       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5867                                true);
5868       iv_ca_set_no_cp (data, ivs, use);
5869       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5870
5871       if (compare_costs (act_cost, best_cost) < 0)
5872         {
5873           best_cost = act_cost;
5874
5875           iv_ca_delta_free (&best_delta);
5876           best_delta = act_delta;
5877         }
5878       else
5879         iv_ca_delta_free (&act_delta);
5880     }
5881
5882   if (infinite_cost_p (best_cost))
5883     {
5884       for (i = 0; i < use->n_map_members; i++)
5885         {
5886           cp = use->cost_map + i;
5887           cand = cp->cand;
5888           if (!cand)
5889             continue;
5890
5891           /* Already tried this.  */
5892           if (cand->important)
5893             {
5894               if (originalp && cand->pos == IP_ORIGINAL)
5895                 continue;
5896               if (!originalp && cand->iv->base_object == NULL_TREE)
5897                 continue;
5898             }
5899
5900           if (iv_ca_cand_used_p (ivs, cand))
5901             continue;
5902
5903           act_delta = NULL;
5904           iv_ca_set_cp (data, ivs, use, cp);
5905           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5906           iv_ca_set_no_cp (data, ivs, use);
5907           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5908                                        cp, act_delta);
5909
5910           if (compare_costs (act_cost, best_cost) < 0)
5911             {
5912               best_cost = act_cost;
5913
5914               if (best_delta)
5915                 iv_ca_delta_free (&best_delta);
5916               best_delta = act_delta;
5917             }
5918           else
5919             iv_ca_delta_free (&act_delta);
5920         }
5921     }
5922
5923   iv_ca_delta_commit (data, ivs, best_delta, true);
5924   iv_ca_delta_free (&best_delta);
5925
5926   return !infinite_cost_p (best_cost);
5927 }
5928
5929 /* Finds an initial assignment of candidates to uses.  */
5930
5931 static struct iv_ca *
5932 get_initial_solution (struct ivopts_data *data, bool originalp)
5933 {
5934   struct iv_ca *ivs = iv_ca_new (data);
5935   unsigned i;
5936
5937   for (i = 0; i < n_iv_uses (data); i++)
5938     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5939       {
5940         iv_ca_free (&ivs);
5941         return NULL;
5942       }
5943
5944   return ivs;
5945 }
5946
5947 /* Tries to improve set of induction variables IVS.  */
5948
5949 static bool
5950 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5951 {
5952   unsigned i, n_ivs;
5953   comp_cost acost, best_cost = iv_ca_cost (ivs);
5954   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5955   struct iv_cand *cand;
5956
5957   /* Try extending the set of induction variables by one.  */
5958   for (i = 0; i < n_iv_cands (data); i++)
5959     {
5960       cand = iv_cand (data, i);
5961
5962       if (iv_ca_cand_used_p (ivs, cand))
5963         continue;
5964
5965       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5966       if (!act_delta)
5967         continue;
5968
5969       /* If we successfully added the candidate and the set is small enough,
5970          try optimizing it by removing other candidates.  */
5971       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5972         {
5973           iv_ca_delta_commit (data, ivs, act_delta, true);
5974           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5975           iv_ca_delta_commit (data, ivs, act_delta, false);
5976           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5977         }
5978
5979       if (compare_costs (acost, best_cost) < 0)
5980         {
5981           best_cost = acost;
5982           iv_ca_delta_free (&best_delta);
5983           best_delta = act_delta;
5984         }
5985       else
5986         iv_ca_delta_free (&act_delta);
5987     }
5988
5989   if (!best_delta)
5990     {
5991       /* Try removing the candidates from the set instead.  */
5992       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5993
5994       /* Nothing more we can do.  */
5995       if (!best_delta)
5996         return false;
5997     }
5998
5999   iv_ca_delta_commit (data, ivs, best_delta, true);
6000   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6001   iv_ca_delta_free (&best_delta);
6002   return true;
6003 }
6004
6005 /* Attempts to find the optimal set of induction variables.  We do simple
6006    greedy heuristic -- we try to replace at most one candidate in the selected
6007    solution and remove the unused ivs while this improves the cost.  */
6008
6009 static struct iv_ca *
6010 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6011 {
6012   struct iv_ca *set;
6013
6014   /* Get the initial solution.  */
6015   set = get_initial_solution (data, originalp);
6016   if (!set)
6017     {
6018       if (dump_file && (dump_flags & TDF_DETAILS))
6019         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6020       return NULL;
6021     }
6022
6023   if (dump_file && (dump_flags & TDF_DETAILS))
6024     {
6025       fprintf (dump_file, "Initial set of candidates:\n");
6026       iv_ca_dump (data, dump_file, set);
6027     }
6028
6029   while (try_improve_iv_set (data, set))
6030     {
6031       if (dump_file && (dump_flags & TDF_DETAILS))
6032         {
6033           fprintf (dump_file, "Improved to:\n");
6034           iv_ca_dump (data, dump_file, set);
6035         }
6036     }
6037
6038   return set;
6039 }
6040
6041 static struct iv_ca *
6042 find_optimal_iv_set (struct ivopts_data *data)
6043 {
6044   unsigned i;
6045   struct iv_ca *set, *origset;
6046   struct iv_use *use;
6047   comp_cost cost, origcost;
6048
6049   /* Determine the cost based on a strategy that starts with original IVs,
6050      and try again using a strategy that prefers candidates not based
6051      on any IVs.  */
6052   origset = find_optimal_iv_set_1 (data, true);
6053   set = find_optimal_iv_set_1 (data, false);
6054
6055   if (!origset && !set)
6056     return NULL;
6057
6058   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6059   cost = set ? iv_ca_cost (set) : infinite_cost;
6060
6061   if (dump_file && (dump_flags & TDF_DETAILS))
6062     {
6063       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6064                origcost.cost, origcost.complexity);
6065       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6066                cost.cost, cost.complexity);
6067     }
6068
6069   /* Choose the one with the best cost.  */
6070   if (compare_costs (origcost, cost) <= 0)
6071     {
6072       if (set)
6073         iv_ca_free (&set);
6074       set = origset;
6075     }
6076   else if (origset)
6077     iv_ca_free (&origset);
6078
6079   for (i = 0; i < n_iv_uses (data); i++)
6080     {
6081       use = iv_use (data, i);
6082       use->selected = iv_ca_cand_for_use (set, use)->cand;
6083     }
6084
6085   return set;
6086 }
6087
6088 /* Creates a new induction variable corresponding to CAND.  */
6089
6090 static void
6091 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6092 {
6093   gimple_stmt_iterator incr_pos;
6094   tree base;
6095   bool after = false;
6096
6097   if (!cand->iv)
6098     return;
6099
6100   switch (cand->pos)
6101     {
6102     case IP_NORMAL:
6103       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6104       break;
6105
6106     case IP_END:
6107       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6108       after = true;
6109       break;
6110
6111     case IP_AFTER_USE:
6112       after = true;
6113       /* fall through */
6114     case IP_BEFORE_USE:
6115       incr_pos = gsi_for_stmt (cand->incremented_at);
6116       break;
6117
6118     case IP_ORIGINAL:
6119       /* Mark that the iv is preserved.  */
6120       name_info (data, cand->var_before)->preserve_biv = true;
6121       name_info (data, cand->var_after)->preserve_biv = true;
6122
6123       /* Rewrite the increment so that it uses var_before directly.  */
6124       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6125       return;
6126     }
6127
6128   gimple_add_tmp_var (cand->var_before);
6129
6130   base = unshare_expr (cand->iv->base);
6131
6132   create_iv (base, unshare_expr (cand->iv->step),
6133              cand->var_before, data->current_loop,
6134              &incr_pos, after, &cand->var_before, &cand->var_after);
6135 }
6136
6137 /* Creates new induction variables described in SET.  */
6138
6139 static void
6140 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6141 {
6142   unsigned i;
6143   struct iv_cand *cand;
6144   bitmap_iterator bi;
6145
6146   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6147     {
6148       cand = iv_cand (data, i);
6149       create_new_iv (data, cand);
6150     }
6151
6152   if (dump_file && (dump_flags & TDF_DETAILS))
6153     {
6154       fprintf (dump_file, "\nSelected IV set: \n");
6155       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6156         {
6157           cand = iv_cand (data, i);
6158           dump_cand (dump_file, cand);
6159         }
6160       fprintf (dump_file, "\n");
6161     }
6162 }
6163
6164 /* Rewrites USE (definition of iv used in a nonlinear expression)
6165    using candidate CAND.  */
6166
6167 static void
6168 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6169                             struct iv_use *use, struct iv_cand *cand)
6170 {
6171   tree comp;
6172   tree op, tgt;
6173   gimple ass;
6174   gimple_stmt_iterator bsi;
6175
6176   /* An important special case -- if we are asked to express value of
6177      the original iv by itself, just exit; there is no need to
6178      introduce a new computation (that might also need casting the
6179      variable to unsigned and back).  */
6180   if (cand->pos == IP_ORIGINAL
6181       && cand->incremented_at == use->stmt)
6182     {
6183       enum tree_code stmt_code;
6184
6185       gcc_assert (is_gimple_assign (use->stmt));
6186       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6187
6188       /* Check whether we may leave the computation unchanged.
6189          This is the case only if it does not rely on other
6190          computations in the loop -- otherwise, the computation
6191          we rely upon may be removed in remove_unused_ivs,
6192          thus leading to ICE.  */
6193       stmt_code = gimple_assign_rhs_code (use->stmt);
6194       if (stmt_code == PLUS_EXPR
6195           || stmt_code == MINUS_EXPR
6196           || stmt_code == POINTER_PLUS_EXPR)
6197         {
6198           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6199             op = gimple_assign_rhs2 (use->stmt);
6200           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6201             op = gimple_assign_rhs1 (use->stmt);
6202           else
6203             op = NULL_TREE;
6204         }
6205       else
6206         op = NULL_TREE;
6207
6208       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6209         return;
6210     }
6211
6212   comp = get_computation (data->current_loop, use, cand);
6213   gcc_assert (comp != NULL_TREE);
6214
6215   switch (gimple_code (use->stmt))
6216     {
6217     case GIMPLE_PHI:
6218       tgt = PHI_RESULT (use->stmt);
6219
6220       /* If we should keep the biv, do not replace it.  */
6221       if (name_info (data, tgt)->preserve_biv)
6222         return;
6223
6224       bsi = gsi_after_labels (gimple_bb (use->stmt));
6225       break;
6226
6227     case GIMPLE_ASSIGN:
6228       tgt = gimple_assign_lhs (use->stmt);
6229       bsi = gsi_for_stmt (use->stmt);
6230       break;
6231
6232     default:
6233       gcc_unreachable ();
6234     }
6235
6236   if (!valid_gimple_rhs_p (comp)
6237       || (gimple_code (use->stmt) != GIMPLE_PHI
6238           /* We can't allow re-allocating the stmt as it might be pointed
6239              to still.  */
6240           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6241               >= gimple_num_ops (gsi_stmt (bsi)))))
6242     {
6243       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6244                                        true, GSI_SAME_STMT);
6245       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6246         {
6247           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6248           /* As this isn't a plain copy we have to reset alignment
6249              information.  */
6250           if (SSA_NAME_PTR_INFO (comp))
6251             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6252         }
6253     }
6254
6255   if (gimple_code (use->stmt) == GIMPLE_PHI)
6256     {
6257       ass = gimple_build_assign (tgt, comp);
6258       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6259
6260       bsi = gsi_for_stmt (use->stmt);
6261       remove_phi_node (&bsi, false);
6262     }
6263   else
6264     {
6265       gimple_assign_set_rhs_from_tree (&bsi, comp);
6266       use->stmt = gsi_stmt (bsi);
6267     }
6268 }
6269
6270 /* Performs a peephole optimization to reorder the iv update statement with
6271    a mem ref to enable instruction combining in later phases. The mem ref uses
6272    the iv value before the update, so the reordering transformation requires
6273    adjustment of the offset. CAND is the selected IV_CAND.
6274
6275    Example:
6276
6277    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6278    iv2 = iv1 + 1;
6279
6280    if (t < val)      (1)
6281      goto L;
6282    goto Head;
6283
6284
6285    directly propagating t over to (1) will introduce overlapping live range
6286    thus increase register pressure. This peephole transform it into:
6287
6288
6289    iv2 = iv1 + 1;
6290    t = MEM_REF (base, iv2, 8, 8);
6291    if (t < val)
6292      goto L;
6293    goto Head;
6294 */
6295
6296 static void
6297 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6298 {
6299   tree var_after;
6300   gimple iv_update, stmt;
6301   basic_block bb;
6302   gimple_stmt_iterator gsi, gsi_iv;
6303
6304   if (cand->pos != IP_NORMAL)
6305     return;
6306
6307   var_after = cand->var_after;
6308   iv_update = SSA_NAME_DEF_STMT (var_after);
6309
6310   bb = gimple_bb (iv_update);
6311   gsi = gsi_last_nondebug_bb (bb);
6312   stmt = gsi_stmt (gsi);
6313
6314   /* Only handle conditional statement for now.  */
6315   if (gimple_code (stmt) != GIMPLE_COND)
6316     return;
6317
6318   gsi_prev_nondebug (&gsi);
6319   stmt = gsi_stmt (gsi);
6320   if (stmt != iv_update)
6321     return;
6322
6323   gsi_prev_nondebug (&gsi);
6324   if (gsi_end_p (gsi))
6325     return;
6326
6327   stmt = gsi_stmt (gsi);
6328   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6329     return;
6330
6331   if (stmt != use->stmt)
6332     return;
6333
6334   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6335     return;
6336
6337   if (dump_file && (dump_flags & TDF_DETAILS))
6338     {
6339       fprintf (dump_file, "Reordering \n");
6340       print_gimple_stmt (dump_file, iv_update, 0, 0);
6341       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6342       fprintf (dump_file, "\n");
6343     }
6344
6345   gsi = gsi_for_stmt (use->stmt);
6346   gsi_iv = gsi_for_stmt (iv_update);
6347   gsi_move_before (&gsi_iv, &gsi);
6348
6349   cand->pos = IP_BEFORE_USE;
6350   cand->incremented_at = use->stmt;
6351 }
6352
6353 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6354
6355 static void
6356 rewrite_use_address (struct ivopts_data *data,
6357                      struct iv_use *use, struct iv_cand *cand)
6358 {
6359   aff_tree aff;
6360   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6361   tree base_hint = NULL_TREE;
6362   tree ref, iv;
6363   bool ok;
6364
6365   adjust_iv_update_pos (cand, use);
6366   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6367   gcc_assert (ok);
6368   unshare_aff_combination (&aff);
6369
6370   /* To avoid undefined overflow problems, all IV candidates use unsigned
6371      integer types.  The drawback is that this makes it impossible for
6372      create_mem_ref to distinguish an IV that is based on a memory object
6373      from one that represents simply an offset.
6374
6375      To work around this problem, we pass a hint to create_mem_ref that
6376      indicates which variable (if any) in aff is an IV based on a memory
6377      object.  Note that we only consider the candidate.  If this is not
6378      based on an object, the base of the reference is in some subexpression
6379      of the use -- but these will use pointer types, so they are recognized
6380      by the create_mem_ref heuristics anyway.  */
6381   if (cand->iv->base_object)
6382     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6383
6384   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6385   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6386                         reference_alias_ptr_type (*use->op_p),
6387                         iv, base_hint, data->speed);
6388   copy_ref_info (ref, *use->op_p);
6389   *use->op_p = ref;
6390 }
6391
6392 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6393    candidate CAND.  */
6394
6395 static void
6396 rewrite_use_compare (struct ivopts_data *data,
6397                      struct iv_use *use, struct iv_cand *cand)
6398 {
6399   tree comp, *var_p, op, bound;
6400   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6401   enum tree_code compare;
6402   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6403   bool ok;
6404
6405   bound = cp->value;
6406   if (bound)
6407     {
6408       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6409       tree var_type = TREE_TYPE (var);
6410       gimple_seq stmts;
6411
6412       if (dump_file && (dump_flags & TDF_DETAILS))
6413         {
6414           fprintf (dump_file, "Replacing exit test: ");
6415           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6416         }
6417       compare = cp->comp;
6418       bound = unshare_expr (fold_convert (var_type, bound));
6419       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6420       if (stmts)
6421         gsi_insert_seq_on_edge_immediate (
6422                 loop_preheader_edge (data->current_loop),
6423                 stmts);
6424
6425       gimple_cond_set_lhs (use->stmt, var);
6426       gimple_cond_set_code (use->stmt, compare);
6427       gimple_cond_set_rhs (use->stmt, op);
6428       return;
6429     }
6430
6431   /* The induction variable elimination failed; just express the original
6432      giv.  */
6433   comp = get_computation (data->current_loop, use, cand);
6434   gcc_assert (comp != NULL_TREE);
6435
6436   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6437   gcc_assert (ok);
6438
6439   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6440                                      true, GSI_SAME_STMT);
6441 }
6442
6443 /* Rewrites USE using candidate CAND.  */
6444
6445 static void
6446 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6447 {
6448   switch (use->type)
6449     {
6450       case USE_NONLINEAR_EXPR:
6451         rewrite_use_nonlinear_expr (data, use, cand);
6452         break;
6453
6454       case USE_ADDRESS:
6455         rewrite_use_address (data, use, cand);
6456         break;
6457
6458       case USE_COMPARE:
6459         rewrite_use_compare (data, use, cand);
6460         break;
6461
6462       default:
6463         gcc_unreachable ();
6464     }
6465
6466   update_stmt (use->stmt);
6467 }
6468
6469 /* Rewrite the uses using the selected induction variables.  */
6470
6471 static void
6472 rewrite_uses (struct ivopts_data *data)
6473 {
6474   unsigned i;
6475   struct iv_cand *cand;
6476   struct iv_use *use;
6477
6478   for (i = 0; i < n_iv_uses (data); i++)
6479     {
6480       use = iv_use (data, i);
6481       cand = use->selected;
6482       gcc_assert (cand);
6483
6484       rewrite_use (data, use, cand);
6485     }
6486 }
6487
6488 /* Removes the ivs that are not used after rewriting.  */
6489
6490 static void
6491 remove_unused_ivs (struct ivopts_data *data)
6492 {
6493   unsigned j;
6494   bitmap_iterator bi;
6495   bitmap toremove = BITMAP_ALLOC (NULL);
6496
6497   /* Figure out an order in which to release SSA DEFs so that we don't
6498      release something that we'd have to propagate into a debug stmt
6499      afterwards.  */
6500   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6501     {
6502       struct version_info *info;
6503
6504       info = ver_info (data, j);
6505       if (info->iv
6506           && !integer_zerop (info->iv->step)
6507           && !info->inv_id
6508           && !info->iv->have_use_for
6509           && !info->preserve_biv)
6510         {
6511           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6512
6513           tree def = info->iv->ssa_name;
6514
6515           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6516             {
6517               imm_use_iterator imm_iter;
6518               use_operand_p use_p;
6519               gimple stmt;
6520               int count = 0;
6521
6522               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6523                 {
6524                   if (!gimple_debug_bind_p (stmt))
6525                     continue;
6526
6527                   /* We just want to determine whether to do nothing
6528                      (count == 0), to substitute the computed
6529                      expression into a single use of the SSA DEF by
6530                      itself (count == 1), or to use a debug temp
6531                      because the SSA DEF is used multiple times or as
6532                      part of a larger expression (count > 1). */
6533                   count++;
6534                   if (gimple_debug_bind_get_value (stmt) != def)
6535                     count++;
6536
6537                   if (count > 1)
6538                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6539                 }
6540
6541               if (!count)
6542                 continue;
6543
6544               struct iv_use dummy_use;
6545               struct iv_cand *best_cand = NULL, *cand;
6546               unsigned i, best_pref = 0, cand_pref;
6547
6548               memset (&dummy_use, 0, sizeof (dummy_use));
6549               dummy_use.iv = info->iv;
6550               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6551                 {
6552                   cand = iv_use (data, i)->selected;
6553                   if (cand == best_cand)
6554                     continue;
6555                   cand_pref = operand_equal_p (cand->iv->step,
6556                                                info->iv->step, 0)
6557                     ? 4 : 0;
6558                   cand_pref
6559                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6560                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6561                     ? 2 : 0;
6562                   cand_pref
6563                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6564                     ? 1 : 0;
6565                   if (best_cand == NULL || best_pref < cand_pref)
6566                     {
6567                       best_cand = cand;
6568                       best_pref = cand_pref;
6569                     }
6570                 }
6571
6572               if (!best_cand)
6573                 continue;
6574
6575               tree comp = get_computation_at (data->current_loop,
6576                                               &dummy_use, best_cand,
6577                                               SSA_NAME_DEF_STMT (def));
6578               if (!comp)
6579                 continue;
6580
6581               if (count > 1)
6582                 {
6583                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6584                   DECL_ARTIFICIAL (vexpr) = 1;
6585                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6586                   if (SSA_NAME_VAR (def))
6587                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6588                   else
6589                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6590                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6591                   gimple_stmt_iterator gsi;
6592
6593                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6594                     gsi = gsi_after_labels (gimple_bb
6595                                             (SSA_NAME_DEF_STMT (def)));
6596                   else
6597                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6598
6599                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6600                   comp = vexpr;
6601                 }
6602
6603               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6604                 {
6605                   if (!gimple_debug_bind_p (stmt))
6606                     continue;
6607
6608                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6609                     SET_USE (use_p, comp);
6610
6611                   update_stmt (stmt);
6612                 }
6613             }
6614         }
6615     }
6616
6617   release_defs_bitset (toremove);
6618
6619   BITMAP_FREE (toremove);
6620 }
6621
6622 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6623    for pointer_map_traverse.  */
6624
6625 static bool
6626 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6627                       void *data ATTRIBUTE_UNUSED)
6628 {
6629   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6630
6631   free (niter);
6632   return true;
6633 }
6634
6635 /* Frees data allocated by the optimization of a single loop.  */
6636
6637 static void
6638 free_loop_data (struct ivopts_data *data)
6639 {
6640   unsigned i, j;
6641   bitmap_iterator bi;
6642   tree obj;
6643
6644   if (data->niters)
6645     {
6646       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6647       pointer_map_destroy (data->niters);
6648       data->niters = NULL;
6649     }
6650
6651   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6652     {
6653       struct version_info *info;
6654
6655       info = ver_info (data, i);
6656       free (info->iv);
6657       info->iv = NULL;
6658       info->has_nonlin_use = false;
6659       info->preserve_biv = false;
6660       info->inv_id = 0;
6661     }
6662   bitmap_clear (data->relevant);
6663   bitmap_clear (data->important_candidates);
6664
6665   for (i = 0; i < n_iv_uses (data); i++)
6666     {
6667       struct iv_use *use = iv_use (data, i);
6668
6669       free (use->iv);
6670       BITMAP_FREE (use->related_cands);
6671       for (j = 0; j < use->n_map_members; j++)
6672         if (use->cost_map[j].depends_on)
6673           BITMAP_FREE (use->cost_map[j].depends_on);
6674       free (use->cost_map);
6675       free (use);
6676     }
6677   data->iv_uses.truncate (0);
6678
6679   for (i = 0; i < n_iv_cands (data); i++)
6680     {
6681       struct iv_cand *cand = iv_cand (data, i);
6682
6683       free (cand->iv);
6684       if (cand->depends_on)
6685         BITMAP_FREE (cand->depends_on);
6686       free (cand);
6687     }
6688   data->iv_candidates.truncate (0);
6689
6690   if (data->version_info_size < num_ssa_names)
6691     {
6692       data->version_info_size = 2 * num_ssa_names;
6693       free (data->version_info);
6694       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6695     }
6696
6697   data->max_inv_id = 0;
6698
6699   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6700     SET_DECL_RTL (obj, NULL_RTX);
6701
6702   decl_rtl_to_reset.truncate (0);
6703
6704   data->inv_expr_tab.empty ();
6705   data->inv_expr_id = 0;
6706 }
6707
6708 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6709    loop tree.  */
6710
6711 static void
6712 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6713 {
6714   free_loop_data (data);
6715   free (data->version_info);
6716   BITMAP_FREE (data->relevant);
6717   BITMAP_FREE (data->important_candidates);
6718
6719   decl_rtl_to_reset.release ();
6720   data->iv_uses.release ();
6721   data->iv_candidates.release ();
6722   data->inv_expr_tab.dispose ();
6723 }
6724
6725 /* Returns true if the loop body BODY includes any function calls.  */
6726
6727 static bool
6728 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6729 {
6730   gimple_stmt_iterator gsi;
6731   unsigned i;
6732
6733   for (i = 0; i < num_nodes; i++)
6734     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6735       {
6736         gimple stmt = gsi_stmt (gsi);
6737         if (is_gimple_call (stmt)
6738             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6739           return true;
6740       }
6741   return false;
6742 }
6743
6744 /* Optimizes the LOOP.  Returns true if anything changed.  */
6745
6746 static bool
6747 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6748 {
6749   bool changed = false;
6750   struct iv_ca *iv_ca;
6751   edge exit = single_dom_exit (loop);
6752   basic_block *body;
6753
6754   gcc_assert (!data->niters);
6755   data->current_loop = loop;
6756   data->speed = optimize_loop_for_speed_p (loop);
6757
6758   if (dump_file && (dump_flags & TDF_DETAILS))
6759     {
6760       fprintf (dump_file, "Processing loop %d\n", loop->num);
6761
6762       if (exit)
6763         {
6764           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6765                    exit->src->index, exit->dest->index);
6766           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6767           fprintf (dump_file, "\n");
6768         }
6769
6770       fprintf (dump_file, "\n");
6771     }
6772
6773   body = get_loop_body (loop);
6774   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6775   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6776   free (body);
6777
6778   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6779
6780   /* For each ssa name determines whether it behaves as an induction variable
6781      in some loop.  */
6782   if (!find_induction_variables (data))
6783     goto finish;
6784
6785   /* Finds interesting uses (item 1).  */
6786   find_interesting_uses (data);
6787   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6788     goto finish;
6789
6790   /* Finds candidates for the induction variables (item 2).  */
6791   find_iv_candidates (data);
6792
6793   /* Calculates the costs (item 3, part 1).  */
6794   determine_iv_costs (data);
6795   determine_use_iv_costs (data);
6796   determine_set_costs (data);
6797
6798   /* Find the optimal set of induction variables (item 3, part 2).  */
6799   iv_ca = find_optimal_iv_set (data);
6800   if (!iv_ca)
6801     goto finish;
6802   changed = true;
6803
6804   /* Create the new induction variables (item 4, part 1).  */
6805   create_new_ivs (data, iv_ca);
6806   iv_ca_free (&iv_ca);
6807
6808   /* Rewrite the uses (item 4, part 2).  */
6809   rewrite_uses (data);
6810
6811   /* Remove the ivs that are unused after rewriting.  */
6812   remove_unused_ivs (data);
6813
6814   /* We have changed the structure of induction variables; it might happen
6815      that definitions in the scev database refer to some of them that were
6816      eliminated.  */
6817   scev_reset ();
6818
6819 finish:
6820   free_loop_data (data);
6821
6822   return changed;
6823 }
6824
6825 /* Main entry point.  Optimizes induction variables in loops.  */
6826
6827 void
6828 tree_ssa_iv_optimize (void)
6829 {
6830   struct loop *loop;
6831   struct ivopts_data data;
6832   loop_iterator li;
6833
6834   tree_ssa_iv_optimize_init (&data);
6835
6836   /* Optimize the loops starting with the innermost ones.  */
6837   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6838     {
6839       if (dump_file && (dump_flags & TDF_DETAILS))
6840         flow_loop_dump (loop, dump_file, NULL, 1);
6841
6842       tree_ssa_iv_optimize_loop (&data, loop);
6843     }
6844
6845   tree_ssa_iv_optimize_finalize (&data);
6846 }