gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "tree-pretty-print.h"
  73 #include "gimple-pretty-print.h"
  74 #include "tree-flow.h"
  75 #include "tree-dump.h"
  76 #include "timevar.h"
  77 #include "cfgloop.h"
  78 #include "tree-pass.h"
  79 #include "ggc.h"
  80 #include "insn-config.h"
  81 #include "pointer-set.h"
  82 #include "hashtab.h"
  83 #include "tree-chrec.h"
  84 #include "tree-scalar-evolution.h"
  85 #include "cfgloop.h"
  86 #include "params.h"
  87 #include "langhooks.h"
  88 #include "tree-affine.h"
  89 #include "target.h"
  90 #include "tree-inline.h"
  91 #include "tree-ssa-propagate.h"
  92 #include "expmed.h"
  93
  94 static hashval_t mbc_entry_hash (const void *);
  95 static int mbc_entry_eq (const void*, const void *);
  96
  97 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  98    cost of different addressing modes.  This should be moved to a TBD
  99    interface between the GIMPLE and RTL worlds.  */
 100 #include "expr.h"
 101 #include "recog.h"
 102
 103 /* The infinite cost.  */
 104 #define INFTY 10000000
 105
 106 #define AVG_LOOP_NITER(LOOP) 5
 107
 108 /* Returns the expected number of loop iterations for LOOP.
 109    The average trip count is computed from profile data if it
 110    exists. */
 111
 112 static inline HOST_WIDE_INT
 113 avg_loop_niter (struct loop *loop)
 114 {
 115   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 116   if (niter == -1)
 117     return AVG_LOOP_NITER (loop);
 118
 119   return niter;
 120 }
 121
 122 /* Representation of the induction variable.  */
 123 struct iv
 124 {
 125   tree base;            /* Initial value of the iv.  */
 126   tree base_object;     /* A memory object to that the induction variable points.  */
 127   tree step;            /* Step of the iv (constant only).  */
 128   tree ssa_name;        /* The ssa name with the value.  */
 129   bool biv_p;           /* Is it a biv?  */
 130   bool have_use_for;    /* Do we already have a use for it?  */
 131   unsigned use_id;      /* The identifier in the use if it is the case.  */
 132 };
 133
 134 /* Per-ssa version information (induction variable descriptions, etc.).  */
 135 struct version_info
 136 {
 137   tree name;            /* The ssa name.  */
 138   struct iv *iv;        /* Induction variable description.  */
 139   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 140                            an expression that is not an induction variable.  */
 141   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 142   unsigned inv_id;      /* Id of an invariant.  */
 143 };
 144
 145 /* Types of uses.  */
 146 enum use_type
 147 {
 148   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 149   USE_ADDRESS,          /* Use in an address.  */
 150   USE_COMPARE           /* Use is a compare.  */
 151 };
 152
 153 /* Cost of a computation.  */
 154 typedef struct
 155 {
 156   int cost;             /* The runtime cost.  */
 157   unsigned complexity;  /* The estimate of the complexity of the code for
 158                            the computation (in no concrete units --
 159                            complexity field should be larger for more
 160                            complex expressions and addressing modes).  */
 161 } comp_cost;
 162
 163 static const comp_cost no_cost = {0, 0};
 164 static const comp_cost infinite_cost = {INFTY, INFTY};
 165
 166 /* The candidate - cost pair.  */
 167 struct cost_pair
 168 {
 169   struct iv_cand *cand; /* The candidate.  */
 170   comp_cost cost;       /* The cost.  */
 171   bitmap depends_on;    /* The list of invariants that have to be
 172                            preserved.  */
 173   tree value;           /* For final value elimination, the expression for
 174                            the final value of the iv.  For iv elimination,
 175                            the new bound to compare with.  */
 176   enum tree_code comp;  /* For iv elimination, the comparison.  */
 177   int inv_expr_id;      /* Loop invariant expression id.  */
 178 };
 179
 180 /* Use.  */
 181 struct iv_use
 182 {
 183   unsigned id;          /* The id of the use.  */
 184   enum use_type type;   /* Type of the use.  */
 185   struct iv *iv;        /* The induction variable it is based on.  */
 186   gimple stmt;          /* Statement in that it occurs.  */
 187   tree *op_p;           /* The place where it occurs.  */
 188   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 189                            important ones.  */
 190
 191   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 192   struct cost_pair *cost_map;
 193                         /* The costs wrto the iv candidates.  */
 194
 195   struct iv_cand *selected;
 196                         /* The selected candidate.  */
 197 };
 198
 199 /* The position where the iv is computed.  */
 200 enum iv_position
 201 {
 202   IP_NORMAL,            /* At the end, just before the exit condition.  */
 203   IP_END,               /* At the end of the latch block.  */
 204   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 205   IP_AFTER_USE,         /* Immediately after a specific use.  */
 206   IP_ORIGINAL           /* The original biv.  */
 207 };
 208
 209 /* The induction variable candidate.  */
 210 struct iv_cand
 211 {
 212   unsigned id;          /* The number of the candidate.  */
 213   bool important;       /* Whether this is an "important" candidate, i.e. such
 214                            that it should be considered by all uses.  */
 215   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 216   gimple incremented_at;/* For original biv, the statement where it is
 217                            incremented.  */
 218   tree var_before;      /* The variable used for it before increment.  */
 219   tree var_after;       /* The variable used for it after increment.  */
 220   struct iv *iv;        /* The value of the candidate.  NULL for
 221                            "pseudocandidate" used to indicate the possibility
 222                            to replace the final value of an iv by direct
 223                            computation of the value.  */
 224   unsigned cost;        /* Cost of the candidate.  */
 225   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 226   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 227                               where it is incremented.  */
 228   bitmap depends_on;    /* The list of invariants that are used in step of the
 229                            biv.  */
 230 };
 231
 232 /* Loop invariant expression hashtable entry.  */
 233 struct iv_inv_expr_ent
 234 {
 235   tree expr;
 236   int id;
 237   hashval_t hash;
 238 };
 239
 240 /* The data used by the induction variable optimizations.  */
 241
 242 typedef struct iv_use *iv_use_p;
 243 DEF_VEC_P(iv_use_p);
 244 DEF_VEC_ALLOC_P(iv_use_p,heap);
 245
 246 typedef struct iv_cand *iv_cand_p;
 247 DEF_VEC_P(iv_cand_p);
 248 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 249
 250 struct ivopts_data
 251 {
 252   /* The currently optimized loop.  */
 253   struct loop *current_loop;
 254
 255   /* Numbers of iterations for all exits of the current loop.  */
 256   struct pointer_map_t *niters;
 257
 258   /* Number of registers used in it.  */
 259   unsigned regs_used;
 260
 261   /* The size of version_info array allocated.  */
 262   unsigned version_info_size;
 263
 264   /* The array of information for the ssa names.  */
 265   struct version_info *version_info;
 266
 267   /* The hashtable of loop invariant expressions created
 268      by ivopt.  */
 269   htab_t inv_expr_tab;
 270
 271   /* Loop invariant expression id.  */
 272   int inv_expr_id;
 273
 274   /* The bitmap of indices in version_info whose value was changed.  */
 275   bitmap relevant;
 276
 277   /* The uses of induction variables.  */
 278   VEC(iv_use_p,heap) *iv_uses;
 279
 280   /* The candidates.  */
 281   VEC(iv_cand_p,heap) *iv_candidates;
 282
 283   /* A bitmap of important candidates.  */
 284   bitmap important_candidates;
 285
 286   /* The maximum invariant id.  */
 287   unsigned max_inv_id;
 288
 289   /* Whether to consider just related and important candidates when replacing a
 290      use.  */
 291   bool consider_all_candidates;
 292
 293   /* Are we optimizing for speed?  */
 294   bool speed;
 295
 296   /* Whether the loop body includes any function calls.  */
 297   bool body_includes_call;
 298
 299   /* Whether the loop body can only be exited via single exit.  */
 300   bool loop_single_exit_p;
 301 };
 302
 303 /* An assignment of iv candidates to uses.  */
 304
 305 struct iv_ca
 306 {
 307   /* The number of uses covered by the assignment.  */
 308   unsigned upto;
 309
 310   /* Number of uses that cannot be expressed by the candidates in the set.  */
 311   unsigned bad_uses;
 312
 313   /* Candidate assigned to a use, together with the related costs.  */
 314   struct cost_pair **cand_for_use;
 315
 316   /* Number of times each candidate is used.  */
 317   unsigned *n_cand_uses;
 318
 319   /* The candidates used.  */
 320   bitmap cands;
 321
 322   /* The number of candidates in the set.  */
 323   unsigned n_cands;
 324
 325   /* Total number of registers needed.  */
 326   unsigned n_regs;
 327
 328   /* Total cost of expressing uses.  */
 329   comp_cost cand_use_cost;
 330
 331   /* Total cost of candidates.  */
 332   unsigned cand_cost;
 333
 334   /* Number of times each invariant is used.  */
 335   unsigned *n_invariant_uses;
 336
 337   /* The array holding the number of uses of each loop
 338      invariant expressions created by ivopt.  */
 339   unsigned *used_inv_expr;
 340
 341   /* The number of created loop invariants.  */
 342   unsigned num_used_inv_expr;
 343
 344   /* Total cost of the assignment.  */
 345   comp_cost cost;
 346 };
 347
 348 /* Difference of two iv candidate assignments.  */
 349
 350 struct iv_ca_delta
 351 {
 352   /* Changed use.  */
 353   struct iv_use *use;
 354
 355   /* An old assignment (for rollback purposes).  */
 356   struct cost_pair *old_cp;
 357
 358   /* A new assignment.  */
 359   struct cost_pair *new_cp;
 360
 361   /* Next change in the list.  */
 362   struct iv_ca_delta *next_change;
 363 };
 364
 365 /* Bound on number of candidates below that all candidates are considered.  */
 366
 367 #define CONSIDER_ALL_CANDIDATES_BOUND \
 368   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 369
 370 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 371    optimizing such a loop would help, and it would take ages).  */
 372
 373 #define MAX_CONSIDERED_USES \
 374   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 375
 376 /* If there are at most this number of ivs in the set, try removing unnecessary
 377    ivs from the set always.  */
 378
 379 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 380   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 381
 382 /* The list of trees for that the decl_rtl field must be reset is stored
 383    here.  */
 384
 385 static VEC(tree,heap) *decl_rtl_to_reset;
 386
 387 /* Cached costs for multiplies by constants, and a flag to indicate
 388    when they're valid.  */
 389 static htab_t mult_costs[2];
 390 static bool cost_tables_exist = false;
 391
 392 static comp_cost force_expr_to_var_cost (tree, bool);
 393
 394 /* Number of uses recorded in DATA.  */
 395
 396 static inline unsigned
 397 n_iv_uses (struct ivopts_data *data)
 398 {
 399   return VEC_length (iv_use_p, data->iv_uses);
 400 }
 401
 402 /* Ith use recorded in DATA.  */
 403
 404 static inline struct iv_use *
 405 iv_use (struct ivopts_data *data, unsigned i)
 406 {
 407   return VEC_index (iv_use_p, data->iv_uses, i);
 408 }
 409
 410 /* Number of candidates recorded in DATA.  */
 411
 412 static inline unsigned
 413 n_iv_cands (struct ivopts_data *data)
 414 {
 415   return VEC_length (iv_cand_p, data->iv_candidates);
 416 }
 417
 418 /* Ith candidate recorded in DATA.  */
 419
 420 static inline struct iv_cand *
 421 iv_cand (struct ivopts_data *data, unsigned i)
 422 {
 423   return VEC_index (iv_cand_p, data->iv_candidates, i);
 424 }
 425
 426 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 427
 428 edge
 429 single_dom_exit (struct loop *loop)
 430 {
 431   edge exit = single_exit (loop);
 432
 433   if (!exit)
 434     return NULL;
 435
 436   if (!just_once_each_iteration_p (loop, exit->src))
 437     return NULL;
 438
 439   return exit;
 440 }
 441
 442 /* Dumps information about the induction variable IV to FILE.  */
 443
 444 extern void dump_iv (FILE *, struct iv *);
 445 void
 446 dump_iv (FILE *file, struct iv *iv)
 447 {
 448   if (iv->ssa_name)
 449     {
 450       fprintf (file, "ssa name ");
 451       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 452       fprintf (file, "\n");
 453     }
 454
 455   fprintf (file, "  type ");
 456   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   if (iv->step)
 460     {
 461       fprintf (file, "  base ");
 462       print_generic_expr (file, iv->base, TDF_SLIM);
 463       fprintf (file, "\n");
 464
 465       fprintf (file, "  step ");
 466       print_generic_expr (file, iv->step, TDF_SLIM);
 467       fprintf (file, "\n");
 468     }
 469   else
 470     {
 471       fprintf (file, "  invariant ");
 472       print_generic_expr (file, iv->base, TDF_SLIM);
 473       fprintf (file, "\n");
 474     }
 475
 476   if (iv->base_object)
 477     {
 478       fprintf (file, "  base object ");
 479       print_generic_expr (file, iv->base_object, TDF_SLIM);
 480       fprintf (file, "\n");
 481     }
 482
 483   if (iv->biv_p)
 484     fprintf (file, "  is a biv\n");
 485 }
 486
 487 /* Dumps information about the USE to FILE.  */
 488
 489 extern void dump_use (FILE *, struct iv_use *);
 490 void
 491 dump_use (FILE *file, struct iv_use *use)
 492 {
 493   fprintf (file, "use %d\n", use->id);
 494
 495   switch (use->type)
 496     {
 497     case USE_NONLINEAR_EXPR:
 498       fprintf (file, "  generic\n");
 499       break;
 500
 501     case USE_ADDRESS:
 502       fprintf (file, "  address\n");
 503       break;
 504
 505     case USE_COMPARE:
 506       fprintf (file, "  compare\n");
 507       break;
 508
 509     default:
 510       gcc_unreachable ();
 511     }
 512
 513   fprintf (file, "  in statement ");
 514   print_gimple_stmt (file, use->stmt, 0, 0);
 515   fprintf (file, "\n");
 516
 517   fprintf (file, "  at position ");
 518   if (use->op_p)
 519     print_generic_expr (file, *use->op_p, TDF_SLIM);
 520   fprintf (file, "\n");
 521
 522   dump_iv (file, use->iv);
 523
 524   if (use->related_cands)
 525     {
 526       fprintf (file, "  related candidates ");
 527       dump_bitmap (file, use->related_cands);
 528     }
 529 }
 530
 531 /* Dumps information about the uses to FILE.  */
 532
 533 extern void dump_uses (FILE *, struct ivopts_data *);
 534 void
 535 dump_uses (FILE *file, struct ivopts_data *data)
 536 {
 537   unsigned i;
 538   struct iv_use *use;
 539
 540   for (i = 0; i < n_iv_uses (data); i++)
 541     {
 542       use = iv_use (data, i);
 543
 544       dump_use (file, use);
 545       fprintf (file, "\n");
 546     }
 547 }
 548
 549 /* Dumps information about induction variable candidate CAND to FILE.  */
 550
 551 extern void dump_cand (FILE *, struct iv_cand *);
 552 void
 553 dump_cand (FILE *file, struct iv_cand *cand)
 554 {
 555   struct iv *iv = cand->iv;
 556
 557   fprintf (file, "candidate %d%s\n",
 558            cand->id, cand->important ? " (important)" : "");
 559
 560   if (cand->depends_on)
 561     {
 562       fprintf (file, "  depends on ");
 563       dump_bitmap (file, cand->depends_on);
 564     }
 565
 566   if (!iv)
 567     {
 568       fprintf (file, "  final value replacement\n");
 569       return;
 570     }
 571
 572   if (cand->var_before)
 573     {
 574       fprintf (file, "  var_before ");
 575       print_generic_expr (file, cand->var_before, TDF_SLIM);
 576       fprintf (file, "\n");
 577     }
 578   if (cand->var_after)
 579     {
 580       fprintf (file, "  var_after ");
 581       print_generic_expr (file, cand->var_after, TDF_SLIM);
 582       fprintf (file, "\n");
 583     }
 584
 585   switch (cand->pos)
 586     {
 587     case IP_NORMAL:
 588       fprintf (file, "  incremented before exit test\n");
 589       break;
 590
 591     case IP_BEFORE_USE:
 592       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 593       break;
 594
 595     case IP_AFTER_USE:
 596       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 597       break;
 598
 599     case IP_END:
 600       fprintf (file, "  incremented at end\n");
 601       break;
 602
 603     case IP_ORIGINAL:
 604       fprintf (file, "  original biv\n");
 605       break;
 606     }
 607
 608   dump_iv (file, iv);
 609 }
 610
 611 /* Returns the info for ssa version VER.  */
 612
 613 static inline struct version_info *
 614 ver_info (struct ivopts_data *data, unsigned ver)
 615 {
 616   return data->version_info + ver;
 617 }
 618
 619 /* Returns the info for ssa name NAME.  */
 620
 621 static inline struct version_info *
 622 name_info (struct ivopts_data *data, tree name)
 623 {
 624   return ver_info (data, SSA_NAME_VERSION (name));
 625 }
 626
 627 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 628    emitted in LOOP.  */
 629
 630 static bool
 631 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 632 {
 633   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 634
 635   gcc_assert (bb);
 636
 637   if (sbb == loop->latch)
 638     return true;
 639
 640   if (sbb != bb)
 641     return false;
 642
 643   return stmt == last_stmt (bb);
 644 }
 645
 646 /* Returns true if STMT if after the place where the original induction
 647    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 648    if the positions are identical.  */
 649
 650 static bool
 651 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 652 {
 653   basic_block cand_bb = gimple_bb (cand->incremented_at);
 654   basic_block stmt_bb = gimple_bb (stmt);
 655
 656   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 657     return false;
 658
 659   if (stmt_bb != cand_bb)
 660     return true;
 661
 662   if (true_if_equal
 663       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 664     return true;
 665   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 666 }
 667
 668 /* Returns true if STMT if after the place where the induction variable
 669    CAND is incremented in LOOP.  */
 670
 671 static bool
 672 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 673 {
 674   switch (cand->pos)
 675     {
 676     case IP_END:
 677       return false;
 678
 679     case IP_NORMAL:
 680       return stmt_after_ip_normal_pos (loop, stmt);
 681
 682     case IP_ORIGINAL:
 683     case IP_AFTER_USE:
 684       return stmt_after_inc_pos (cand, stmt, false);
 685
 686     case IP_BEFORE_USE:
 687       return stmt_after_inc_pos (cand, stmt, true);
 688
 689     default:
 690       gcc_unreachable ();
 691     }
 692 }
 693
 694 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 695
 696 static bool
 697 abnormal_ssa_name_p (tree exp)
 698 {
 699   if (!exp)
 700     return false;
 701
 702   if (TREE_CODE (exp) != SSA_NAME)
 703     return false;
 704
 705   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 706 }
 707
 708 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 709    abnormal phi node.  Callback for for_each_index.  */
 710
 711 static bool
 712 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 713                                   void *data ATTRIBUTE_UNUSED)
 714 {
 715   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 716     {
 717       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 718         return false;
 719       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 720         return false;
 721     }
 722
 723   return !abnormal_ssa_name_p (*index);
 724 }
 725
 726 /* Returns true if EXPR contains a ssa name that occurs in an
 727    abnormal phi node.  */
 728
 729 bool
 730 contains_abnormal_ssa_name_p (tree expr)
 731 {
 732   enum tree_code code;
 733   enum tree_code_class codeclass;
 734
 735   if (!expr)
 736     return false;
 737
 738   code = TREE_CODE (expr);
 739   codeclass = TREE_CODE_CLASS (code);
 740
 741   if (code == SSA_NAME)
 742     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 743
 744   if (code == INTEGER_CST
 745       || is_gimple_min_invariant (expr))
 746     return false;
 747
 748   if (code == ADDR_EXPR)
 749     return !for_each_index (&TREE_OPERAND (expr, 0),
 750                             idx_contains_abnormal_ssa_name_p,
 751                             NULL);
 752
 753   if (code == COND_EXPR)
 754     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 755       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 756       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 757
 758   switch (codeclass)
 759     {
 760     case tcc_binary:
 761     case tcc_comparison:
 762       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 763         return true;
 764
 765       /* Fallthru.  */
 766     case tcc_unary:
 767       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 768         return true;
 769
 770       break;
 771
 772     default:
 773       gcc_unreachable ();
 774     }
 775
 776   return false;
 777 }
 778
 779 /*  Returns the structure describing number of iterations determined from
 780     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 781
 782 static struct tree_niter_desc *
 783 niter_for_exit (struct ivopts_data *data, edge exit)
 784 {
 785   struct tree_niter_desc *desc;
 786   void **slot;
 787
 788   if (!data->niters)
 789     {
 790       data->niters = pointer_map_create ();
 791       slot = NULL;
 792     }
 793   else
 794     slot = pointer_map_contains (data->niters, exit);
 795
 796   if (!slot)
 797     {
 798       /* Try to determine number of iterations.  We cannot safely work with ssa
 799          names that appear in phi nodes on abnormal edges, so that we do not
 800          create overlapping life ranges for them (PR 27283).  */
 801       desc = XNEW (struct tree_niter_desc);
 802       if (!number_of_iterations_exit (data->current_loop,
 803                                       exit, desc, true)
 804           || contains_abnormal_ssa_name_p (desc->niter))
 805         {
 806           XDELETE (desc);
 807           desc = NULL;
 808         }
 809       slot = pointer_map_insert (data->niters, exit);
 810       *slot = desc;
 811     }
 812   else
 813     desc = (struct tree_niter_desc *) *slot;
 814
 815   return desc;
 816 }
 817
 818 /* Returns the structure describing number of iterations determined from
 819    single dominating exit of DATA->current_loop, or NULL if something
 820    goes wrong.  */
 821
 822 static struct tree_niter_desc *
 823 niter_for_single_dom_exit (struct ivopts_data *data)
 824 {
 825   edge exit = single_dom_exit (data->current_loop);
 826
 827   if (!exit)
 828     return NULL;
 829
 830   return niter_for_exit (data, exit);
 831 }
 832
 833 /* Hash table equality function for expressions.  */
 834
 835 static int
 836 htab_inv_expr_eq (const void *ent1, const void *ent2)
 837 {
 838   const struct iv_inv_expr_ent *expr1 =
 839       (const struct iv_inv_expr_ent *)ent1;
 840   const struct iv_inv_expr_ent *expr2 =
 841       (const struct iv_inv_expr_ent *)ent2;
 842
 843   return expr1->hash == expr2->hash
 844          && operand_equal_p (expr1->expr, expr2->expr, 0);
 845 }
 846
 847 /* Hash function for loop invariant expressions.  */
 848
 849 static hashval_t
 850 htab_inv_expr_hash (const void *ent)
 851 {
 852   const struct iv_inv_expr_ent *expr =
 853       (const struct iv_inv_expr_ent *)ent;
 854   return expr->hash;
 855 }
 856
 857 /* Allocate data structures for the cost model.  */
 858
 859 void
 860 initialize_costs (void)
 861 {
 862   mult_costs[0] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
 863   mult_costs[1] = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
 864   cost_tables_exist = true;
 865 }
 866
 867 /* Release data structures for the cost model.  */
 868
 869 void
 870 finalize_costs (void)
 871 {
 872   cost_tables_exist = false;
 873   htab_delete (mult_costs[0]);
 874   htab_delete (mult_costs[1]);
 875 }
 876
 877 /* Initializes data structures used by the iv optimization pass, stored
 878    in DATA.  */
 879
 880 static void
 881 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 882 {
 883   data->version_info_size = 2 * num_ssa_names;
 884   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 885   data->relevant = BITMAP_ALLOC (NULL);
 886   data->important_candidates = BITMAP_ALLOC (NULL);
 887   data->max_inv_id = 0;
 888   data->niters = NULL;
 889   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 890   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 891   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 892                                     htab_inv_expr_eq, free);
 893   data->inv_expr_id = 0;
 894   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 895
 896   initialize_costs ();
 897 }
 898
 899 /* Returns a memory object to that EXPR points.  In case we are able to
 900    determine that it does not point to any such object, NULL is returned.  */
 901
 902 static tree
 903 determine_base_object (tree expr)
 904 {
 905   enum tree_code code = TREE_CODE (expr);
 906   tree base, obj;
 907
 908   /* If this is a pointer casted to any type, we need to determine
 909      the base object for the pointer; so handle conversions before
 910      throwing away non-pointer expressions.  */
 911   if (CONVERT_EXPR_P (expr))
 912     return determine_base_object (TREE_OPERAND (expr, 0));
 913
 914   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 915     return NULL_TREE;
 916
 917   switch (code)
 918     {
 919     case INTEGER_CST:
 920       return NULL_TREE;
 921
 922     case ADDR_EXPR:
 923       obj = TREE_OPERAND (expr, 0);
 924       base = get_base_address (obj);
 925
 926       if (!base)
 927         return expr;
 928
 929       if (TREE_CODE (base) == MEM_REF)
 930         return determine_base_object (TREE_OPERAND (base, 0));
 931
 932       return fold_convert (ptr_type_node,
 933                            build_fold_addr_expr (base));
 934
 935     case POINTER_PLUS_EXPR:
 936       return determine_base_object (TREE_OPERAND (expr, 0));
 937
 938     case PLUS_EXPR:
 939     case MINUS_EXPR:
 940       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 941       gcc_unreachable ();
 942
 943     default:
 944       return fold_convert (ptr_type_node, expr);
 945     }
 946 }
 947
 948 /* Allocates an induction variable with given initial value BASE and step STEP
 949    for loop LOOP.  */
 950
 951 static struct iv *
 952 alloc_iv (tree base, tree step)
 953 {
 954   struct iv *iv = XCNEW (struct iv);
 955   gcc_assert (step != NULL_TREE);
 956
 957   iv->base = base;
 958   iv->base_object = determine_base_object (base);
 959   iv->step = step;
 960   iv->biv_p = false;
 961   iv->have_use_for = false;
 962   iv->use_id = 0;
 963   iv->ssa_name = NULL_TREE;
 964
 965   return iv;
 966 }
 967
 968 /* Sets STEP and BASE for induction variable IV.  */
 969
 970 static void
 971 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 972 {
 973   struct version_info *info = name_info (data, iv);
 974
 975   gcc_assert (!info->iv);
 976
 977   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 978   info->iv = alloc_iv (base, step);
 979   info->iv->ssa_name = iv;
 980 }
 981
 982 /* Finds induction variable declaration for VAR.  */
 983
 984 static struct iv *
 985 get_iv (struct ivopts_data *data, tree var)
 986 {
 987   basic_block bb;
 988   tree type = TREE_TYPE (var);
 989
 990   if (!POINTER_TYPE_P (type)
 991       && !INTEGRAL_TYPE_P (type))
 992     return NULL;
 993
 994   if (!name_info (data, var)->iv)
 995     {
 996       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 997
 998       if (!bb
 999           || !flow_bb_inside_loop_p (data->current_loop, bb))
1000         set_iv (data, var, var, build_int_cst (type, 0));
1001     }
1002
1003   return name_info (data, var)->iv;
1004 }
1005
1006 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1007    not define a simple affine biv with nonzero step.  */
1008
1009 static tree
1010 determine_biv_step (gimple phi)
1011 {
1012   struct loop *loop = gimple_bb (phi)->loop_father;
1013   tree name = PHI_RESULT (phi);
1014   affine_iv iv;
1015
1016   if (!is_gimple_reg (name))
1017     return NULL_TREE;
1018
1019   if (!simple_iv (loop, loop, name, &iv, true))
1020     return NULL_TREE;
1021
1022   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1023 }
1024
1025 /* Finds basic ivs.  */
1026
1027 static bool
1028 find_bivs (struct ivopts_data *data)
1029 {
1030   gimple phi;
1031   tree step, type, base;
1032   bool found = false;
1033   struct loop *loop = data->current_loop;
1034   gimple_stmt_iterator psi;
1035
1036   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1037     {
1038       phi = gsi_stmt (psi);
1039
1040       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1041         continue;
1042
1043       step = determine_biv_step (phi);
1044       if (!step)
1045         continue;
1046
1047       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1048       base = expand_simple_operations (base);
1049       if (contains_abnormal_ssa_name_p (base)
1050           || contains_abnormal_ssa_name_p (step))
1051         continue;
1052
1053       type = TREE_TYPE (PHI_RESULT (phi));
1054       base = fold_convert (type, base);
1055       if (step)
1056         {
1057           if (POINTER_TYPE_P (type))
1058             step = convert_to_ptrofftype (step);
1059           else
1060             step = fold_convert (type, step);
1061         }
1062
1063       set_iv (data, PHI_RESULT (phi), base, step);
1064       found = true;
1065     }
1066
1067   return found;
1068 }
1069
1070 /* Marks basic ivs.  */
1071
1072 static void
1073 mark_bivs (struct ivopts_data *data)
1074 {
1075   gimple phi;
1076   tree var;
1077   struct iv *iv, *incr_iv;
1078   struct loop *loop = data->current_loop;
1079   basic_block incr_bb;
1080   gimple_stmt_iterator psi;
1081
1082   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1083     {
1084       phi = gsi_stmt (psi);
1085
1086       iv = get_iv (data, PHI_RESULT (phi));
1087       if (!iv)
1088         continue;
1089
1090       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1091       incr_iv = get_iv (data, var);
1092       if (!incr_iv)
1093         continue;
1094
1095       /* If the increment is in the subloop, ignore it.  */
1096       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1097       if (incr_bb->loop_father != data->current_loop
1098           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1099         continue;
1100
1101       iv->biv_p = true;
1102       incr_iv->biv_p = true;
1103     }
1104 }
1105
1106 /* Checks whether STMT defines a linear induction variable and stores its
1107    parameters to IV.  */
1108
1109 static bool
1110 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1111 {
1112   tree lhs;
1113   struct loop *loop = data->current_loop;
1114
1115   iv->base = NULL_TREE;
1116   iv->step = NULL_TREE;
1117
1118   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1119     return false;
1120
1121   lhs = gimple_assign_lhs (stmt);
1122   if (TREE_CODE (lhs) != SSA_NAME)
1123     return false;
1124
1125   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1126     return false;
1127   iv->base = expand_simple_operations (iv->base);
1128
1129   if (contains_abnormal_ssa_name_p (iv->base)
1130       || contains_abnormal_ssa_name_p (iv->step))
1131     return false;
1132
1133   /* If STMT could throw, then do not consider STMT as defining a GIV.
1134      While this will suppress optimizations, we can not safely delete this
1135      GIV and associated statements, even if it appears it is not used.  */
1136   if (stmt_could_throw_p (stmt))
1137     return false;
1138
1139   return true;
1140 }
1141
1142 /* Finds general ivs in statement STMT.  */
1143
1144 static void
1145 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1146 {
1147   affine_iv iv;
1148
1149   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1150     return;
1151
1152   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1153 }
1154
1155 /* Finds general ivs in basic block BB.  */
1156
1157 static void
1158 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1159 {
1160   gimple_stmt_iterator bsi;
1161
1162   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1163     find_givs_in_stmt (data, gsi_stmt (bsi));
1164 }
1165
1166 /* Finds general ivs.  */
1167
1168 static void
1169 find_givs (struct ivopts_data *data)
1170 {
1171   struct loop *loop = data->current_loop;
1172   basic_block *body = get_loop_body_in_dom_order (loop);
1173   unsigned i;
1174
1175   for (i = 0; i < loop->num_nodes; i++)
1176     find_givs_in_bb (data, body[i]);
1177   free (body);
1178 }
1179
1180 /* For each ssa name defined in LOOP determines whether it is an induction
1181    variable and if so, its initial value and step.  */
1182
1183 static bool
1184 find_induction_variables (struct ivopts_data *data)
1185 {
1186   unsigned i;
1187   bitmap_iterator bi;
1188
1189   if (!find_bivs (data))
1190     return false;
1191
1192   find_givs (data);
1193   mark_bivs (data);
1194
1195   if (dump_file && (dump_flags & TDF_DETAILS))
1196     {
1197       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1198
1199       if (niter)
1200         {
1201           fprintf (dump_file, "  number of iterations ");
1202           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1203           if (!integer_zerop (niter->may_be_zero))
1204             {
1205               fprintf (dump_file, "; zero if ");
1206               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1207             }
1208           fprintf (dump_file, "\n\n");
1209         };
1210
1211       fprintf (dump_file, "Induction variables:\n\n");
1212
1213       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1214         {
1215           if (ver_info (data, i)->iv)
1216             dump_iv (dump_file, ver_info (data, i)->iv);
1217         }
1218     }
1219
1220   return true;
1221 }
1222
1223 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1224
1225 static struct iv_use *
1226 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1227             gimple stmt, enum use_type use_type)
1228 {
1229   struct iv_use *use = XCNEW (struct iv_use);
1230
1231   use->id = n_iv_uses (data);
1232   use->type = use_type;
1233   use->iv = iv;
1234   use->stmt = stmt;
1235   use->op_p = use_p;
1236   use->related_cands = BITMAP_ALLOC (NULL);
1237
1238   /* To avoid showing ssa name in the dumps, if it was not reset by the
1239      caller.  */
1240   iv->ssa_name = NULL_TREE;
1241
1242   if (dump_file && (dump_flags & TDF_DETAILS))
1243     dump_use (dump_file, use);
1244
1245   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1246
1247   return use;
1248 }
1249
1250 /* Checks whether OP is a loop-level invariant and if so, records it.
1251    NONLINEAR_USE is true if the invariant is used in a way we do not
1252    handle specially.  */
1253
1254 static void
1255 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1256 {
1257   basic_block bb;
1258   struct version_info *info;
1259
1260   if (TREE_CODE (op) != SSA_NAME
1261       || !is_gimple_reg (op))
1262     return;
1263
1264   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1265   if (bb
1266       && flow_bb_inside_loop_p (data->current_loop, bb))
1267     return;
1268
1269   info = name_info (data, op);
1270   info->name = op;
1271   info->has_nonlin_use |= nonlinear_use;
1272   if (!info->inv_id)
1273     info->inv_id = ++data->max_inv_id;
1274   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1275 }
1276
1277 /* Checks whether the use OP is interesting and if so, records it.  */
1278
1279 static struct iv_use *
1280 find_interesting_uses_op (struct ivopts_data *data, tree op)
1281 {
1282   struct iv *iv;
1283   struct iv *civ;
1284   gimple stmt;
1285   struct iv_use *use;
1286
1287   if (TREE_CODE (op) != SSA_NAME)
1288     return NULL;
1289
1290   iv = get_iv (data, op);
1291   if (!iv)
1292     return NULL;
1293
1294   if (iv->have_use_for)
1295     {
1296       use = iv_use (data, iv->use_id);
1297
1298       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1299       return use;
1300     }
1301
1302   if (integer_zerop (iv->step))
1303     {
1304       record_invariant (data, op, true);
1305       return NULL;
1306     }
1307   iv->have_use_for = true;
1308
1309   civ = XNEW (struct iv);
1310   *civ = *iv;
1311
1312   stmt = SSA_NAME_DEF_STMT (op);
1313   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1314               || is_gimple_assign (stmt));
1315
1316   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1317   iv->use_id = use->id;
1318
1319   return use;
1320 }
1321
1322 /* Given a condition in statement STMT, checks whether it is a compare
1323    of an induction variable and an invariant.  If this is the case,
1324    CONTROL_VAR is set to location of the iv, BOUND to the location of
1325    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1326    induction variable descriptions, and true is returned.  If this is not
1327    the case, CONTROL_VAR and BOUND are set to the arguments of the
1328    condition and false is returned.  */
1329
1330 static bool
1331 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1332                        tree **control_var, tree **bound,
1333                        struct iv **iv_var, struct iv **iv_bound)
1334 {
1335   /* The objects returned when COND has constant operands.  */
1336   static struct iv const_iv;
1337   static tree zero;
1338   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1339   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1340   bool ret = false;
1341
1342   if (gimple_code (stmt) == GIMPLE_COND)
1343     {
1344       op0 = gimple_cond_lhs_ptr (stmt);
1345       op1 = gimple_cond_rhs_ptr (stmt);
1346     }
1347   else
1348     {
1349       op0 = gimple_assign_rhs1_ptr (stmt);
1350       op1 = gimple_assign_rhs2_ptr (stmt);
1351     }
1352
1353   zero = integer_zero_node;
1354   const_iv.step = integer_zero_node;
1355
1356   if (TREE_CODE (*op0) == SSA_NAME)
1357     iv0 = get_iv (data, *op0);
1358   if (TREE_CODE (*op1) == SSA_NAME)
1359     iv1 = get_iv (data, *op1);
1360
1361   /* Exactly one of the compared values must be an iv, and the other one must
1362      be an invariant.  */
1363   if (!iv0 || !iv1)
1364     goto end;
1365
1366   if (integer_zerop (iv0->step))
1367     {
1368       /* Control variable may be on the other side.  */
1369       tmp_op = op0; op0 = op1; op1 = tmp_op;
1370       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1371     }
1372   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1373
1374 end:
1375   if (control_var)
1376     *control_var = op0;;
1377   if (iv_var)
1378     *iv_var = iv0;;
1379   if (bound)
1380     *bound = op1;
1381   if (iv_bound)
1382     *iv_bound = iv1;
1383
1384   return ret;
1385 }
1386
1387 /* Checks whether the condition in STMT is interesting and if so,
1388    records it.  */
1389
1390 static void
1391 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1392 {
1393   tree *var_p, *bound_p;
1394   struct iv *var_iv, *civ;
1395
1396   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1397     {
1398       find_interesting_uses_op (data, *var_p);
1399       find_interesting_uses_op (data, *bound_p);
1400       return;
1401     }
1402
1403   civ = XNEW (struct iv);
1404   *civ = *var_iv;
1405   record_use (data, NULL, civ, stmt, USE_COMPARE);
1406 }
1407
1408 /* Returns true if expression EXPR is obviously invariant in LOOP,
1409    i.e. if all its operands are defined outside of the LOOP.  LOOP
1410    should not be the function body.  */
1411
1412 bool
1413 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1414 {
1415   basic_block def_bb;
1416   unsigned i, len;
1417
1418   gcc_assert (loop_depth (loop) > 0);
1419
1420   if (is_gimple_min_invariant (expr))
1421     return true;
1422
1423   if (TREE_CODE (expr) == SSA_NAME)
1424     {
1425       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1426       if (def_bb
1427           && flow_bb_inside_loop_p (loop, def_bb))
1428         return false;
1429
1430       return true;
1431     }
1432
1433   if (!EXPR_P (expr))
1434     return false;
1435
1436   len = TREE_OPERAND_LENGTH (expr);
1437   for (i = 0; i < len; i++)
1438     if (TREE_OPERAND (expr, i)
1439         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1440       return false;
1441
1442   return true;
1443 }
1444
1445 /* Returns true if statement STMT is obviously invariant in LOOP,
1446    i.e. if all its operands on the RHS are defined outside of the LOOP.
1447    LOOP should not be the function body.  */
1448
1449 bool
1450 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1451 {
1452   unsigned i;
1453   tree lhs;
1454
1455   gcc_assert (loop_depth (loop) > 0);
1456
1457   lhs = gimple_get_lhs (stmt);
1458   for (i = 0; i < gimple_num_ops (stmt); i++)
1459     {
1460       tree op = gimple_op (stmt, i);
1461       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1462         return false;
1463     }
1464
1465   return true;
1466 }
1467
1468 /* Cumulates the steps of indices into DATA and replaces their values with the
1469    initial ones.  Returns false when the value of the index cannot be determined.
1470    Callback for for_each_index.  */
1471
1472 struct ifs_ivopts_data
1473 {
1474   struct ivopts_data *ivopts_data;
1475   gimple stmt;
1476   tree step;
1477 };
1478
1479 static bool
1480 idx_find_step (tree base, tree *idx, void *data)
1481 {
1482   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1483   struct iv *iv;
1484   tree step, iv_base, iv_step, lbound, off;
1485   struct loop *loop = dta->ivopts_data->current_loop;
1486
1487   /* If base is a component ref, require that the offset of the reference
1488      be invariant.  */
1489   if (TREE_CODE (base) == COMPONENT_REF)
1490     {
1491       off = component_ref_field_offset (base);
1492       return expr_invariant_in_loop_p (loop, off);
1493     }
1494
1495   /* If base is array, first check whether we will be able to move the
1496      reference out of the loop (in order to take its address in strength
1497      reduction).  In order for this to work we need both lower bound
1498      and step to be loop invariants.  */
1499   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1500     {
1501       /* Moreover, for a range, the size needs to be invariant as well.  */
1502       if (TREE_CODE (base) == ARRAY_RANGE_REF
1503           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1504         return false;
1505
1506       step = array_ref_element_size (base);
1507       lbound = array_ref_low_bound (base);
1508
1509       if (!expr_invariant_in_loop_p (loop, step)
1510           || !expr_invariant_in_loop_p (loop, lbound))
1511         return false;
1512     }
1513
1514   if (TREE_CODE (*idx) != SSA_NAME)
1515     return true;
1516
1517   iv = get_iv (dta->ivopts_data, *idx);
1518   if (!iv)
1519     return false;
1520
1521   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1522           *&x[0], which is not folded and does not trigger the
1523           ARRAY_REF path below.  */
1524   *idx = iv->base;
1525
1526   if (integer_zerop (iv->step))
1527     return true;
1528
1529   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1530     {
1531       step = array_ref_element_size (base);
1532
1533       /* We only handle addresses whose step is an integer constant.  */
1534       if (TREE_CODE (step) != INTEGER_CST)
1535         return false;
1536     }
1537   else
1538     /* The step for pointer arithmetics already is 1 byte.  */
1539     step = size_one_node;
1540
1541   iv_base = iv->base;
1542   iv_step = iv->step;
1543   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1544                             sizetype, &iv_base, &iv_step, dta->stmt,
1545                             false))
1546     {
1547       /* The index might wrap.  */
1548       return false;
1549     }
1550
1551   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1552   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1553
1554   return true;
1555 }
1556
1557 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1558    object is passed to it in DATA.  */
1559
1560 static bool
1561 idx_record_use (tree base, tree *idx,
1562                 void *vdata)
1563 {
1564   struct ivopts_data *data = (struct ivopts_data *) vdata;
1565   find_interesting_uses_op (data, *idx);
1566   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1567     {
1568       find_interesting_uses_op (data, array_ref_element_size (base));
1569       find_interesting_uses_op (data, array_ref_low_bound (base));
1570     }
1571   return true;
1572 }
1573
1574 /* If we can prove that TOP = cst * BOT for some constant cst,
1575    store cst to MUL and return true.  Otherwise return false.
1576    The returned value is always sign-extended, regardless of the
1577    signedness of TOP and BOT.  */
1578
1579 static bool
1580 constant_multiple_of (tree top, tree bot, double_int *mul)
1581 {
1582   tree mby;
1583   enum tree_code code;
1584   double_int res, p0, p1;
1585   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1586
1587   STRIP_NOPS (top);
1588   STRIP_NOPS (bot);
1589
1590   if (operand_equal_p (top, bot, 0))
1591     {
1592       *mul = double_int_one;
1593       return true;
1594     }
1595
1596   code = TREE_CODE (top);
1597   switch (code)
1598     {
1599     case MULT_EXPR:
1600       mby = TREE_OPERAND (top, 1);
1601       if (TREE_CODE (mby) != INTEGER_CST)
1602         return false;
1603
1604       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1605         return false;
1606
1607       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1608                               precision);
1609       return true;
1610
1611     case PLUS_EXPR:
1612     case MINUS_EXPR:
1613       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1614           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1615         return false;
1616
1617       if (code == MINUS_EXPR)
1618         p1 = double_int_neg (p1);
1619       *mul = double_int_sext (double_int_add (p0, p1), precision);
1620       return true;
1621
1622     case INTEGER_CST:
1623       if (TREE_CODE (bot) != INTEGER_CST)
1624         return false;
1625
1626       p0 = double_int_sext (tree_to_double_int (top), precision);
1627       p1 = double_int_sext (tree_to_double_int (bot), precision);
1628       if (double_int_zero_p (p1))
1629         return false;
1630       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1631                               precision);
1632       return double_int_zero_p (res);
1633
1634     default:
1635       return false;
1636     }
1637 }
1638
1639 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1640
1641 static bool
1642 may_be_unaligned_p (tree ref, tree step)
1643 {
1644   tree base;
1645   tree base_type;
1646   HOST_WIDE_INT bitsize;
1647   HOST_WIDE_INT bitpos;
1648   tree toffset;
1649   enum machine_mode mode;
1650   int unsignedp, volatilep;
1651   unsigned base_align;
1652
1653   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1654      thus they are not misaligned.  */
1655   if (TREE_CODE (ref) == TARGET_MEM_REF)
1656     return false;
1657
1658   /* The test below is basically copy of what expr.c:normal_inner_ref
1659      does to check whether the object must be loaded by parts when
1660      STRICT_ALIGNMENT is true.  */
1661   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1662                               &unsignedp, &volatilep, true);
1663   base_type = TREE_TYPE (base);
1664   base_align = get_object_alignment (base);
1665   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1666
1667   if (mode != BLKmode)
1668     {
1669       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1670
1671       if (base_align < mode_align
1672           || (bitpos % mode_align) != 0
1673           || (bitpos % BITS_PER_UNIT) != 0)
1674         return true;
1675
1676       if (toffset
1677           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1678         return true;
1679
1680       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1681         return true;
1682     }
1683
1684   return false;
1685 }
1686
1687 /* Return true if EXPR may be non-addressable.   */
1688
1689 bool
1690 may_be_nonaddressable_p (tree expr)
1691 {
1692   switch (TREE_CODE (expr))
1693     {
1694     case TARGET_MEM_REF:
1695       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1696          target, thus they are always addressable.  */
1697       return false;
1698
1699     case COMPONENT_REF:
1700       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1701              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1702
1703     case VIEW_CONVERT_EXPR:
1704       /* This kind of view-conversions may wrap non-addressable objects
1705          and make them look addressable.  After some processing the
1706          non-addressability may be uncovered again, causing ADDR_EXPRs
1707          of inappropriate objects to be built.  */
1708       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1709           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1710         return true;
1711
1712       /* ... fall through ... */
1713
1714     case ARRAY_REF:
1715     case ARRAY_RANGE_REF:
1716       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1717
1718     CASE_CONVERT:
1719       return true;
1720
1721     default:
1722       break;
1723     }
1724
1725   return false;
1726 }
1727
1728 /* Finds addresses in *OP_P inside STMT.  */
1729
1730 static void
1731 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1732 {
1733   tree base = *op_p, step = size_zero_node;
1734   struct iv *civ;
1735   struct ifs_ivopts_data ifs_ivopts_data;
1736
1737   /* Do not play with volatile memory references.  A bit too conservative,
1738      perhaps, but safe.  */
1739   if (gimple_has_volatile_ops (stmt))
1740     goto fail;
1741
1742   /* Ignore bitfields for now.  Not really something terribly complicated
1743      to handle.  TODO.  */
1744   if (TREE_CODE (base) == BIT_FIELD_REF)
1745     goto fail;
1746
1747   base = unshare_expr (base);
1748
1749   if (TREE_CODE (base) == TARGET_MEM_REF)
1750     {
1751       tree type = build_pointer_type (TREE_TYPE (base));
1752       tree astep;
1753
1754       if (TMR_BASE (base)
1755           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1756         {
1757           civ = get_iv (data, TMR_BASE (base));
1758           if (!civ)
1759             goto fail;
1760
1761           TMR_BASE (base) = civ->base;
1762           step = civ->step;
1763         }
1764       if (TMR_INDEX2 (base)
1765           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1766         {
1767           civ = get_iv (data, TMR_INDEX2 (base));
1768           if (!civ)
1769             goto fail;
1770
1771           TMR_INDEX2 (base) = civ->base;
1772           step = civ->step;
1773         }
1774       if (TMR_INDEX (base)
1775           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1776         {
1777           civ = get_iv (data, TMR_INDEX (base));
1778           if (!civ)
1779             goto fail;
1780
1781           TMR_INDEX (base) = civ->base;
1782           astep = civ->step;
1783
1784           if (astep)
1785             {
1786               if (TMR_STEP (base))
1787                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1788
1789               step = fold_build2 (PLUS_EXPR, type, step, astep);
1790             }
1791         }
1792
1793       if (integer_zerop (step))
1794         goto fail;
1795       base = tree_mem_ref_addr (type, base);
1796     }
1797   else
1798     {
1799       ifs_ivopts_data.ivopts_data = data;
1800       ifs_ivopts_data.stmt = stmt;
1801       ifs_ivopts_data.step = size_zero_node;
1802       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1803           || integer_zerop (ifs_ivopts_data.step))
1804         goto fail;
1805       step = ifs_ivopts_data.step;
1806
1807       /* Check that the base expression is addressable.  This needs
1808          to be done after substituting bases of IVs into it.  */
1809       if (may_be_nonaddressable_p (base))
1810         goto fail;
1811
1812       /* Moreover, on strict alignment platforms, check that it is
1813          sufficiently aligned.  */
1814       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1815         goto fail;
1816
1817       base = build_fold_addr_expr (base);
1818
1819       /* Substituting bases of IVs into the base expression might
1820          have caused folding opportunities.  */
1821       if (TREE_CODE (base) == ADDR_EXPR)
1822         {
1823           tree *ref = &TREE_OPERAND (base, 0);
1824           while (handled_component_p (*ref))
1825             ref = &TREE_OPERAND (*ref, 0);
1826           if (TREE_CODE (*ref) == MEM_REF)
1827             {
1828               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1829                                       TREE_OPERAND (*ref, 0),
1830                                       TREE_OPERAND (*ref, 1));
1831               if (tem)
1832                 *ref = tem;
1833             }
1834         }
1835     }
1836
1837   civ = alloc_iv (base, step);
1838   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1839   return;
1840
1841 fail:
1842   for_each_index (op_p, idx_record_use, data);
1843 }
1844
1845 /* Finds and records invariants used in STMT.  */
1846
1847 static void
1848 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1849 {
1850   ssa_op_iter iter;
1851   use_operand_p use_p;
1852   tree op;
1853
1854   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1855     {
1856       op = USE_FROM_PTR (use_p);
1857       record_invariant (data, op, false);
1858     }
1859 }
1860
1861 /* Finds interesting uses of induction variables in the statement STMT.  */
1862
1863 static void
1864 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1865 {
1866   struct iv *iv;
1867   tree op, *lhs, *rhs;
1868   ssa_op_iter iter;
1869   use_operand_p use_p;
1870   enum tree_code code;
1871
1872   find_invariants_stmt (data, stmt);
1873
1874   if (gimple_code (stmt) == GIMPLE_COND)
1875     {
1876       find_interesting_uses_cond (data, stmt);
1877       return;
1878     }
1879
1880   if (is_gimple_assign (stmt))
1881     {
1882       lhs = gimple_assign_lhs_ptr (stmt);
1883       rhs = gimple_assign_rhs1_ptr (stmt);
1884
1885       if (TREE_CODE (*lhs) == SSA_NAME)
1886         {
1887           /* If the statement defines an induction variable, the uses are not
1888              interesting by themselves.  */
1889
1890           iv = get_iv (data, *lhs);
1891
1892           if (iv && !integer_zerop (iv->step))
1893             return;
1894         }
1895
1896       code = gimple_assign_rhs_code (stmt);
1897       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1898           && (REFERENCE_CLASS_P (*rhs)
1899               || is_gimple_val (*rhs)))
1900         {
1901           if (REFERENCE_CLASS_P (*rhs))
1902             find_interesting_uses_address (data, stmt, rhs);
1903           else
1904             find_interesting_uses_op (data, *rhs);
1905
1906           if (REFERENCE_CLASS_P (*lhs))
1907             find_interesting_uses_address (data, stmt, lhs);
1908           return;
1909         }
1910       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1911         {
1912           find_interesting_uses_cond (data, stmt);
1913           return;
1914         }
1915
1916       /* TODO -- we should also handle address uses of type
1917
1918          memory = call (whatever);
1919
1920          and
1921
1922          call (memory).  */
1923     }
1924
1925   if (gimple_code (stmt) == GIMPLE_PHI
1926       && gimple_bb (stmt) == data->current_loop->header)
1927     {
1928       iv = get_iv (data, PHI_RESULT (stmt));
1929
1930       if (iv && !integer_zerop (iv->step))
1931         return;
1932     }
1933
1934   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1935     {
1936       op = USE_FROM_PTR (use_p);
1937
1938       if (TREE_CODE (op) != SSA_NAME)
1939         continue;
1940
1941       iv = get_iv (data, op);
1942       if (!iv)
1943         continue;
1944
1945       find_interesting_uses_op (data, op);
1946     }
1947 }
1948
1949 /* Finds interesting uses of induction variables outside of loops
1950    on loop exit edge EXIT.  */
1951
1952 static void
1953 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1954 {
1955   gimple phi;
1956   gimple_stmt_iterator psi;
1957   tree def;
1958
1959   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1960     {
1961       phi = gsi_stmt (psi);
1962       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1963       if (is_gimple_reg (def))
1964         find_interesting_uses_op (data, def);
1965     }
1966 }
1967
1968 /* Finds uses of the induction variables that are interesting.  */
1969
1970 static void
1971 find_interesting_uses (struct ivopts_data *data)
1972 {
1973   basic_block bb;
1974   gimple_stmt_iterator bsi;
1975   basic_block *body = get_loop_body (data->current_loop);
1976   unsigned i;
1977   struct version_info *info;
1978   edge e;
1979
1980   if (dump_file && (dump_flags & TDF_DETAILS))
1981     fprintf (dump_file, "Uses:\n\n");
1982
1983   for (i = 0; i < data->current_loop->num_nodes; i++)
1984     {
1985       edge_iterator ei;
1986       bb = body[i];
1987
1988       FOR_EACH_EDGE (e, ei, bb->succs)
1989         if (e->dest != EXIT_BLOCK_PTR
1990             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1991           find_interesting_uses_outside (data, e);
1992
1993       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1994         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1995       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1996         if (!is_gimple_debug (gsi_stmt (bsi)))
1997           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1998     }
1999
2000   if (dump_file && (dump_flags & TDF_DETAILS))
2001     {
2002       bitmap_iterator bi;
2003
2004       fprintf (dump_file, "\n");
2005
2006       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2007         {
2008           info = ver_info (data, i);
2009           if (info->inv_id)
2010             {
2011               fprintf (dump_file, "  ");
2012               print_generic_expr (dump_file, info->name, TDF_SLIM);
2013               fprintf (dump_file, " is invariant (%d)%s\n",
2014                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2015             }
2016         }
2017
2018       fprintf (dump_file, "\n");
2019     }
2020
2021   free (body);
2022 }
2023
2024 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2025    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2026    we are at the top-level of the processed address.  */
2027
2028 static tree
2029 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2030                 unsigned HOST_WIDE_INT *offset)
2031 {
2032   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2033   enum tree_code code;
2034   tree type, orig_type = TREE_TYPE (expr);
2035   unsigned HOST_WIDE_INT off0, off1, st;
2036   tree orig_expr = expr;
2037
2038   STRIP_NOPS (expr);
2039
2040   type = TREE_TYPE (expr);
2041   code = TREE_CODE (expr);
2042   *offset = 0;
2043
2044   switch (code)
2045     {
2046     case INTEGER_CST:
2047       if (!cst_and_fits_in_hwi (expr)
2048           || integer_zerop (expr))
2049         return orig_expr;
2050
2051       *offset = int_cst_value (expr);
2052       return build_int_cst (orig_type, 0);
2053
2054     case POINTER_PLUS_EXPR:
2055     case PLUS_EXPR:
2056     case MINUS_EXPR:
2057       op0 = TREE_OPERAND (expr, 0);
2058       op1 = TREE_OPERAND (expr, 1);
2059
2060       op0 = strip_offset_1 (op0, false, false, &off0);
2061       op1 = strip_offset_1 (op1, false, false, &off1);
2062
2063       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2064       if (op0 == TREE_OPERAND (expr, 0)
2065           && op1 == TREE_OPERAND (expr, 1))
2066         return orig_expr;
2067
2068       if (integer_zerop (op1))
2069         expr = op0;
2070       else if (integer_zerop (op0))
2071         {
2072           if (code == MINUS_EXPR)
2073             expr = fold_build1 (NEGATE_EXPR, type, op1);
2074           else
2075             expr = op1;
2076         }
2077       else
2078         expr = fold_build2 (code, type, op0, op1);
2079
2080       return fold_convert (orig_type, expr);
2081
2082     case MULT_EXPR:
2083       op1 = TREE_OPERAND (expr, 1);
2084       if (!cst_and_fits_in_hwi (op1))
2085         return orig_expr;
2086
2087       op0 = TREE_OPERAND (expr, 0);
2088       op0 = strip_offset_1 (op0, false, false, &off0);
2089       if (op0 == TREE_OPERAND (expr, 0))
2090         return orig_expr;
2091
2092       *offset = off0 * int_cst_value (op1);
2093       if (integer_zerop (op0))
2094         expr = op0;
2095       else
2096         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2097
2098       return fold_convert (orig_type, expr);
2099
2100     case ARRAY_REF:
2101     case ARRAY_RANGE_REF:
2102       if (!inside_addr)
2103         return orig_expr;
2104
2105       step = array_ref_element_size (expr);
2106       if (!cst_and_fits_in_hwi (step))
2107         break;
2108
2109       st = int_cst_value (step);
2110       op1 = TREE_OPERAND (expr, 1);
2111       op1 = strip_offset_1 (op1, false, false, &off1);
2112       *offset = off1 * st;
2113
2114       if (top_compref
2115           && integer_zerop (op1))
2116         {
2117           /* Strip the component reference completely.  */
2118           op0 = TREE_OPERAND (expr, 0);
2119           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2120           *offset += off0;
2121           return op0;
2122         }
2123       break;
2124
2125     case COMPONENT_REF:
2126       if (!inside_addr)
2127         return orig_expr;
2128
2129       tmp = component_ref_field_offset (expr);
2130       if (top_compref
2131           && cst_and_fits_in_hwi (tmp))
2132         {
2133           /* Strip the component reference completely.  */
2134           op0 = TREE_OPERAND (expr, 0);
2135           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2136           *offset = off0 + int_cst_value (tmp);
2137           return op0;
2138         }
2139       break;
2140
2141     case ADDR_EXPR:
2142       op0 = TREE_OPERAND (expr, 0);
2143       op0 = strip_offset_1 (op0, true, true, &off0);
2144       *offset += off0;
2145
2146       if (op0 == TREE_OPERAND (expr, 0))
2147         return orig_expr;
2148
2149       expr = build_fold_addr_expr (op0);
2150       return fold_convert (orig_type, expr);
2151
2152     case MEM_REF:
2153       /* ???  Offset operand?  */
2154       inside_addr = false;
2155       break;
2156
2157     default:
2158       return orig_expr;
2159     }
2160
2161   /* Default handling of expressions for that we want to recurse into
2162      the first operand.  */
2163   op0 = TREE_OPERAND (expr, 0);
2164   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2165   *offset += off0;
2166
2167   if (op0 == TREE_OPERAND (expr, 0)
2168       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2169     return orig_expr;
2170
2171   expr = copy_node (expr);
2172   TREE_OPERAND (expr, 0) = op0;
2173   if (op1)
2174     TREE_OPERAND (expr, 1) = op1;
2175
2176   /* Inside address, we might strip the top level component references,
2177      thus changing type of the expression.  Handling of ADDR_EXPR
2178      will fix that.  */
2179   expr = fold_convert (orig_type, expr);
2180
2181   return expr;
2182 }
2183
2184 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2185
2186 static tree
2187 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2188 {
2189   return strip_offset_1 (expr, false, false, offset);
2190 }
2191
2192 /* Returns variant of TYPE that can be used as base for different uses.
2193    We return unsigned type with the same precision, which avoids problems
2194    with overflows.  */
2195
2196 static tree
2197 generic_type_for (tree type)
2198 {
2199   if (POINTER_TYPE_P (type))
2200     return unsigned_type_for (type);
2201
2202   if (TYPE_UNSIGNED (type))
2203     return type;
2204
2205   return unsigned_type_for (type);
2206 }
2207
2208 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2209    the bitmap to that we should store it.  */
2210
2211 static struct ivopts_data *fd_ivopts_data;
2212 static tree
2213 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2214 {
2215   bitmap *depends_on = (bitmap *) data;
2216   struct version_info *info;
2217
2218   if (TREE_CODE (*expr_p) != SSA_NAME)
2219     return NULL_TREE;
2220   info = name_info (fd_ivopts_data, *expr_p);
2221
2222   if (!info->inv_id || info->has_nonlin_use)
2223     return NULL_TREE;
2224
2225   if (!*depends_on)
2226     *depends_on = BITMAP_ALLOC (NULL);
2227   bitmap_set_bit (*depends_on, info->inv_id);
2228
2229   return NULL_TREE;
2230 }
2231
2232 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2233    position to POS.  If USE is not NULL, the candidate is set as related to
2234    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2235    replacement of the final value of the iv by a direct computation.  */
2236
2237 static struct iv_cand *
2238 add_candidate_1 (struct ivopts_data *data,
2239                  tree base, tree step, bool important, enum iv_position pos,
2240                  struct iv_use *use, gimple incremented_at)
2241 {
2242   unsigned i;
2243   struct iv_cand *cand = NULL;
2244   tree type, orig_type;
2245
2246   /* For non-original variables, make sure their values are computed in a type
2247      that does not invoke undefined behavior on overflows (since in general,
2248      we cannot prove that these induction variables are non-wrapping).  */
2249   if (pos != IP_ORIGINAL)
2250     {
2251       orig_type = TREE_TYPE (base);
2252       type = generic_type_for (orig_type);
2253       if (type != orig_type)
2254         {
2255           base = fold_convert (type, base);
2256           step = fold_convert (type, step);
2257         }
2258     }
2259
2260   for (i = 0; i < n_iv_cands (data); i++)
2261     {
2262       cand = iv_cand (data, i);
2263
2264       if (cand->pos != pos)
2265         continue;
2266
2267       if (cand->incremented_at != incremented_at
2268           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2269               && cand->ainc_use != use))
2270         continue;
2271
2272       if (!cand->iv)
2273         {
2274           if (!base && !step)
2275             break;
2276
2277           continue;
2278         }
2279
2280       if (!base && !step)
2281         continue;
2282
2283       if (operand_equal_p (base, cand->iv->base, 0)
2284           && operand_equal_p (step, cand->iv->step, 0)
2285           && (TYPE_PRECISION (TREE_TYPE (base))
2286               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2287         break;
2288     }
2289
2290   if (i == n_iv_cands (data))
2291     {
2292       cand = XCNEW (struct iv_cand);
2293       cand->id = i;
2294
2295       if (!base && !step)
2296         cand->iv = NULL;
2297       else
2298         cand->iv = alloc_iv (base, step);
2299
2300       cand->pos = pos;
2301       if (pos != IP_ORIGINAL && cand->iv)
2302         {
2303           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2304           cand->var_after = cand->var_before;
2305         }
2306       cand->important = important;
2307       cand->incremented_at = incremented_at;
2308       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2309
2310       if (step
2311           && TREE_CODE (step) != INTEGER_CST)
2312         {
2313           fd_ivopts_data = data;
2314           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2315         }
2316
2317       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2318         cand->ainc_use = use;
2319       else
2320         cand->ainc_use = NULL;
2321
2322       if (dump_file && (dump_flags & TDF_DETAILS))
2323         dump_cand (dump_file, cand);
2324     }
2325
2326   if (important && !cand->important)
2327     {
2328       cand->important = true;
2329       if (dump_file && (dump_flags & TDF_DETAILS))
2330         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2331     }
2332
2333   if (use)
2334     {
2335       bitmap_set_bit (use->related_cands, i);
2336       if (dump_file && (dump_flags & TDF_DETAILS))
2337         fprintf (dump_file, "Candidate %d is related to use %d\n",
2338                  cand->id, use->id);
2339     }
2340
2341   return cand;
2342 }
2343
2344 /* Returns true if incrementing the induction variable at the end of the LOOP
2345    is allowed.
2346
2347    The purpose is to avoid splitting latch edge with a biv increment, thus
2348    creating a jump, possibly confusing other optimization passes and leaving
2349    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2350    is not available (so we do not have a better alternative), or if the latch
2351    edge is already nonempty.  */
2352
2353 static bool
2354 allow_ip_end_pos_p (struct loop *loop)
2355 {
2356   if (!ip_normal_pos (loop))
2357     return true;
2358
2359   if (!empty_block_p (ip_end_pos (loop)))
2360     return true;
2361
2362   return false;
2363 }
2364
2365 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2366    Important field is set to IMPORTANT.  */
2367
2368 static void
2369 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2370                         bool important, struct iv_use *use)
2371 {
2372   basic_block use_bb = gimple_bb (use->stmt);
2373   enum machine_mode mem_mode;
2374   unsigned HOST_WIDE_INT cstepi;
2375
2376   /* If we insert the increment in any position other than the standard
2377      ones, we must ensure that it is incremented once per iteration.
2378      It must not be in an inner nested loop, or one side of an if
2379      statement.  */
2380   if (use_bb->loop_father != data->current_loop
2381       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2382       || stmt_could_throw_p (use->stmt)
2383       || !cst_and_fits_in_hwi (step))
2384     return;
2385
2386   cstepi = int_cst_value (step);
2387
2388   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2389   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2390         || USE_STORE_PRE_INCREMENT (mem_mode))
2391        && GET_MODE_SIZE (mem_mode) == cstepi)
2392       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2393            || USE_STORE_PRE_DECREMENT (mem_mode))
2394           && GET_MODE_SIZE (mem_mode) == -cstepi))
2395     {
2396       enum tree_code code = MINUS_EXPR;
2397       tree new_base;
2398       tree new_step = step;
2399
2400       if (POINTER_TYPE_P (TREE_TYPE (base)))
2401         {
2402           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2403           code = POINTER_PLUS_EXPR;
2404         }
2405       else
2406         new_step = fold_convert (TREE_TYPE (base), new_step);
2407       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2408       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2409                        use->stmt);
2410     }
2411   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2412         || USE_STORE_POST_INCREMENT (mem_mode))
2413        && GET_MODE_SIZE (mem_mode) == cstepi)
2414       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2415            || USE_STORE_POST_DECREMENT (mem_mode))
2416           && GET_MODE_SIZE (mem_mode) == -cstepi))
2417     {
2418       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2419                        use->stmt);
2420     }
2421 }
2422
2423 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2424    position to POS.  If USE is not NULL, the candidate is set as related to
2425    it.  The candidate computation is scheduled on all available positions.  */
2426
2427 static void
2428 add_candidate (struct ivopts_data *data,
2429                tree base, tree step, bool important, struct iv_use *use)
2430 {
2431   if (ip_normal_pos (data->current_loop))
2432     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2433   if (ip_end_pos (data->current_loop)
2434       && allow_ip_end_pos_p (data->current_loop))
2435     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2436
2437   if (use != NULL && use->type == USE_ADDRESS)
2438     add_autoinc_candidates (data, base, step, important, use);
2439 }
2440
2441 /* Adds standard iv candidates.  */
2442
2443 static void
2444 add_standard_iv_candidates (struct ivopts_data *data)
2445 {
2446   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2447
2448   /* The same for a double-integer type if it is still fast enough.  */
2449   if (TYPE_PRECISION
2450         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2451       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2452     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2453                    build_int_cst (long_integer_type_node, 1), true, NULL);
2454
2455   /* The same for a double-integer type if it is still fast enough.  */
2456   if (TYPE_PRECISION
2457         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2458       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2459     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2460                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2461 }
2462
2463
2464 /* Adds candidates bases on the old induction variable IV.  */
2465
2466 static void
2467 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2468 {
2469   gimple phi;
2470   tree def;
2471   struct iv_cand *cand;
2472
2473   add_candidate (data, iv->base, iv->step, true, NULL);
2474
2475   /* The same, but with initial value zero.  */
2476   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2477     add_candidate (data, size_int (0), iv->step, true, NULL);
2478   else
2479     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2480                    iv->step, true, NULL);
2481
2482   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2483   if (gimple_code (phi) == GIMPLE_PHI)
2484     {
2485       /* Additionally record the possibility of leaving the original iv
2486          untouched.  */
2487       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2488       cand = add_candidate_1 (data,
2489                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2490                               SSA_NAME_DEF_STMT (def));
2491       cand->var_before = iv->ssa_name;
2492       cand->var_after = def;
2493     }
2494 }
2495
2496 /* Adds candidates based on the old induction variables.  */
2497
2498 static void
2499 add_old_ivs_candidates (struct ivopts_data *data)
2500 {
2501   unsigned i;
2502   struct iv *iv;
2503   bitmap_iterator bi;
2504
2505   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2506     {
2507       iv = ver_info (data, i)->iv;
2508       if (iv && iv->biv_p && !integer_zerop (iv->step))
2509         add_old_iv_candidates (data, iv);
2510     }
2511 }
2512
2513 /* Adds candidates based on the value of the induction variable IV and USE.  */
2514
2515 static void
2516 add_iv_value_candidates (struct ivopts_data *data,
2517                          struct iv *iv, struct iv_use *use)
2518 {
2519   unsigned HOST_WIDE_INT offset;
2520   tree base;
2521   tree basetype;
2522
2523   add_candidate (data, iv->base, iv->step, false, use);
2524
2525   /* The same, but with initial value zero.  Make such variable important,
2526      since it is generic enough so that possibly many uses may be based
2527      on it.  */
2528   basetype = TREE_TYPE (iv->base);
2529   if (POINTER_TYPE_P (basetype))
2530     basetype = sizetype;
2531   add_candidate (data, build_int_cst (basetype, 0),
2532                  iv->step, true, use);
2533
2534   /* Third, try removing the constant offset.  Make sure to even
2535      add a candidate for &a[0] vs. (T *)&a.  */
2536   base = strip_offset (iv->base, &offset);
2537   if (offset
2538       || base != iv->base)
2539     add_candidate (data, base, iv->step, false, use);
2540 }
2541
2542 /* Adds candidates based on the uses.  */
2543
2544 static void
2545 add_derived_ivs_candidates (struct ivopts_data *data)
2546 {
2547   unsigned i;
2548
2549   for (i = 0; i < n_iv_uses (data); i++)
2550     {
2551       struct iv_use *use = iv_use (data, i);
2552
2553       if (!use)
2554         continue;
2555
2556       switch (use->type)
2557         {
2558         case USE_NONLINEAR_EXPR:
2559         case USE_COMPARE:
2560         case USE_ADDRESS:
2561           /* Just add the ivs based on the value of the iv used here.  */
2562           add_iv_value_candidates (data, use->iv, use);
2563           break;
2564
2565         default:
2566           gcc_unreachable ();
2567         }
2568     }
2569 }
2570
2571 /* Record important candidates and add them to related_cands bitmaps
2572    if needed.  */
2573
2574 static void
2575 record_important_candidates (struct ivopts_data *data)
2576 {
2577   unsigned i;
2578   struct iv_use *use;
2579
2580   for (i = 0; i < n_iv_cands (data); i++)
2581     {
2582       struct iv_cand *cand = iv_cand (data, i);
2583
2584       if (cand->important)
2585         bitmap_set_bit (data->important_candidates, i);
2586     }
2587
2588   data->consider_all_candidates = (n_iv_cands (data)
2589                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2590
2591   if (data->consider_all_candidates)
2592     {
2593       /* We will not need "related_cands" bitmaps in this case,
2594          so release them to decrease peak memory consumption.  */
2595       for (i = 0; i < n_iv_uses (data); i++)
2596         {
2597           use = iv_use (data, i);
2598           BITMAP_FREE (use->related_cands);
2599         }
2600     }
2601   else
2602     {
2603       /* Add important candidates to the related_cands bitmaps.  */
2604       for (i = 0; i < n_iv_uses (data); i++)
2605         bitmap_ior_into (iv_use (data, i)->related_cands,
2606                          data->important_candidates);
2607     }
2608 }
2609
2610 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2611    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2612    we allocate a simple list to every use.  */
2613
2614 static void
2615 alloc_use_cost_map (struct ivopts_data *data)
2616 {
2617   unsigned i, size, s, j;
2618
2619   for (i = 0; i < n_iv_uses (data); i++)
2620     {
2621       struct iv_use *use = iv_use (data, i);
2622       bitmap_iterator bi;
2623
2624       if (data->consider_all_candidates)
2625         size = n_iv_cands (data);
2626       else
2627         {
2628           s = 0;
2629           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2630             {
2631               s++;
2632             }
2633
2634           /* Round up to the power of two, so that moduling by it is fast.  */
2635           for (size = 1; size < s; size <<= 1)
2636             continue;
2637         }
2638
2639       use->n_map_members = size;
2640       use->cost_map = XCNEWVEC (struct cost_pair, size);
2641     }
2642 }
2643
2644 /* Returns description of computation cost of expression whose runtime
2645    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2646
2647 static comp_cost
2648 new_cost (unsigned runtime, unsigned complexity)
2649 {
2650   comp_cost cost;
2651
2652   cost.cost = runtime;
2653   cost.complexity = complexity;
2654
2655   return cost;
2656 }
2657
2658 /* Adds costs COST1 and COST2.  */
2659
2660 static comp_cost
2661 add_costs (comp_cost cost1, comp_cost cost2)
2662 {
2663   cost1.cost += cost2.cost;
2664   cost1.complexity += cost2.complexity;
2665
2666   return cost1;
2667 }
2668 /* Subtracts costs COST1 and COST2.  */
2669
2670 static comp_cost
2671 sub_costs (comp_cost cost1, comp_cost cost2)
2672 {
2673   cost1.cost -= cost2.cost;
2674   cost1.complexity -= cost2.complexity;
2675
2676   return cost1;
2677 }
2678
2679 /* Returns a negative number if COST1 < COST2, a positive number if
2680    COST1 > COST2, and 0 if COST1 = COST2.  */
2681
2682 static int
2683 compare_costs (comp_cost cost1, comp_cost cost2)
2684 {
2685   if (cost1.cost == cost2.cost)
2686     return cost1.complexity - cost2.complexity;
2687
2688   return cost1.cost - cost2.cost;
2689 }
2690
2691 /* Returns true if COST is infinite.  */
2692
2693 static bool
2694 infinite_cost_p (comp_cost cost)
2695 {
2696   return cost.cost == INFTY;
2697 }
2698
2699 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2700    on invariants DEPENDS_ON and that the value used in expressing it
2701    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2702
2703 static void
2704 set_use_iv_cost (struct ivopts_data *data,
2705                  struct iv_use *use, struct iv_cand *cand,
2706                  comp_cost cost, bitmap depends_on, tree value,
2707                  enum tree_code comp, int inv_expr_id)
2708 {
2709   unsigned i, s;
2710
2711   if (infinite_cost_p (cost))
2712     {
2713       BITMAP_FREE (depends_on);
2714       return;
2715     }
2716
2717   if (data->consider_all_candidates)
2718     {
2719       use->cost_map[cand->id].cand = cand;
2720       use->cost_map[cand->id].cost = cost;
2721       use->cost_map[cand->id].depends_on = depends_on;
2722       use->cost_map[cand->id].value = value;
2723       use->cost_map[cand->id].comp = comp;
2724       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2725       return;
2726     }
2727
2728   /* n_map_members is a power of two, so this computes modulo.  */
2729   s = cand->id & (use->n_map_members - 1);
2730   for (i = s; i < use->n_map_members; i++)
2731     if (!use->cost_map[i].cand)
2732       goto found;
2733   for (i = 0; i < s; i++)
2734     if (!use->cost_map[i].cand)
2735       goto found;
2736
2737   gcc_unreachable ();
2738
2739 found:
2740   use->cost_map[i].cand = cand;
2741   use->cost_map[i].cost = cost;
2742   use->cost_map[i].depends_on = depends_on;
2743   use->cost_map[i].value = value;
2744   use->cost_map[i].comp = comp;
2745   use->cost_map[i].inv_expr_id = inv_expr_id;
2746 }
2747
2748 /* Gets cost of (USE, CANDIDATE) pair.  */
2749
2750 static struct cost_pair *
2751 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2752                  struct iv_cand *cand)
2753 {
2754   unsigned i, s;
2755   struct cost_pair *ret;
2756
2757   if (!cand)
2758     return NULL;
2759
2760   if (data->consider_all_candidates)
2761     {
2762       ret = use->cost_map + cand->id;
2763       if (!ret->cand)
2764         return NULL;
2765
2766       return ret;
2767     }
2768
2769   /* n_map_members is a power of two, so this computes modulo.  */
2770   s = cand->id & (use->n_map_members - 1);
2771   for (i = s; i < use->n_map_members; i++)
2772     if (use->cost_map[i].cand == cand)
2773       return use->cost_map + i;
2774
2775   for (i = 0; i < s; i++)
2776     if (use->cost_map[i].cand == cand)
2777       return use->cost_map + i;
2778
2779   return NULL;
2780 }
2781
2782 /* Returns estimate on cost of computing SEQ.  */
2783
2784 static unsigned
2785 seq_cost (rtx seq, bool speed)
2786 {
2787   unsigned cost = 0;
2788   rtx set;
2789
2790   for (; seq; seq = NEXT_INSN (seq))
2791     {
2792       set = single_set (seq);
2793       if (set)
2794         cost += set_src_cost (SET_SRC (set), speed);
2795       else
2796         cost++;
2797     }
2798
2799   return cost;
2800 }
2801
2802 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2803 static rtx
2804 produce_memory_decl_rtl (tree obj, int *regno)
2805 {
2806   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2807   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2808   rtx x;
2809
2810   gcc_assert (obj);
2811   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2812     {
2813       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2814       x = gen_rtx_SYMBOL_REF (address_mode, name);
2815       SET_SYMBOL_REF_DECL (x, obj);
2816       x = gen_rtx_MEM (DECL_MODE (obj), x);
2817       set_mem_addr_space (x, as);
2818       targetm.encode_section_info (obj, x, true);
2819     }
2820   else
2821     {
2822       x = gen_raw_REG (address_mode, (*regno)++);
2823       x = gen_rtx_MEM (DECL_MODE (obj), x);
2824       set_mem_addr_space (x, as);
2825     }
2826
2827   return x;
2828 }
2829
2830 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2831    walk_tree.  DATA contains the actual fake register number.  */
2832
2833 static tree
2834 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2835 {
2836   tree obj = NULL_TREE;
2837   rtx x = NULL_RTX;
2838   int *regno = (int *) data;
2839
2840   switch (TREE_CODE (*expr_p))
2841     {
2842     case ADDR_EXPR:
2843       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2844            handled_component_p (*expr_p);
2845            expr_p = &TREE_OPERAND (*expr_p, 0))
2846         continue;
2847       obj = *expr_p;
2848       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2849         x = produce_memory_decl_rtl (obj, regno);
2850       break;
2851
2852     case SSA_NAME:
2853       *ws = 0;
2854       obj = SSA_NAME_VAR (*expr_p);
2855       if (!DECL_RTL_SET_P (obj))
2856         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2857       break;
2858
2859     case VAR_DECL:
2860     case PARM_DECL:
2861     case RESULT_DECL:
2862       *ws = 0;
2863       obj = *expr_p;
2864
2865       if (DECL_RTL_SET_P (obj))
2866         break;
2867
2868       if (DECL_MODE (obj) == BLKmode)
2869         x = produce_memory_decl_rtl (obj, regno);
2870       else
2871         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2872
2873       break;
2874
2875     default:
2876       break;
2877     }
2878
2879   if (x)
2880     {
2881       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2882       SET_DECL_RTL (obj, x);
2883     }
2884
2885   return NULL_TREE;
2886 }
2887
2888 /* Determines cost of the computation of EXPR.  */
2889
2890 static unsigned
2891 computation_cost (tree expr, bool speed)
2892 {
2893   rtx seq, rslt;
2894   tree type = TREE_TYPE (expr);
2895   unsigned cost;
2896   /* Avoid using hard regs in ways which may be unsupported.  */
2897   int regno = LAST_VIRTUAL_REGISTER + 1;
2898   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2899   enum node_frequency real_frequency = node->frequency;
2900
2901   node->frequency = NODE_FREQUENCY_NORMAL;
2902   crtl->maybe_hot_insn_p = speed;
2903   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2904   start_sequence ();
2905   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2906   seq = get_insns ();
2907   end_sequence ();
2908   default_rtl_profile ();
2909   node->frequency = real_frequency;
2910
2911   cost = seq_cost (seq, speed);
2912   if (MEM_P (rslt))
2913     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2914                           TYPE_ADDR_SPACE (type), speed);
2915   else if (!REG_P (rslt))
2916     cost += set_src_cost (rslt, speed);
2917
2918   return cost;
2919 }
2920
2921 /* Returns variable containing the value of candidate CAND at statement AT.  */
2922
2923 static tree
2924 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2925 {
2926   if (stmt_after_increment (loop, cand, stmt))
2927     return cand->var_after;
2928   else
2929     return cand->var_before;
2930 }
2931
2932 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2933    same precision that is at least as wide as the precision of TYPE, stores
2934    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2935    type of A and B.  */
2936
2937 static tree
2938 determine_common_wider_type (tree *a, tree *b)
2939 {
2940   tree wider_type = NULL;
2941   tree suba, subb;
2942   tree atype = TREE_TYPE (*a);
2943
2944   if (CONVERT_EXPR_P (*a))
2945     {
2946       suba = TREE_OPERAND (*a, 0);
2947       wider_type = TREE_TYPE (suba);
2948       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2949         return atype;
2950     }
2951   else
2952     return atype;
2953
2954   if (CONVERT_EXPR_P (*b))
2955     {
2956       subb = TREE_OPERAND (*b, 0);
2957       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2958         return atype;
2959     }
2960   else
2961     return atype;
2962
2963   *a = suba;
2964   *b = subb;
2965   return wider_type;
2966 }
2967
2968 /* Determines the expression by that USE is expressed from induction variable
2969    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2970    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2971
2972 static bool
2973 get_computation_aff (struct loop *loop,
2974                      struct iv_use *use, struct iv_cand *cand, gimple at,
2975                      struct affine_tree_combination *aff)
2976 {
2977   tree ubase = use->iv->base;
2978   tree ustep = use->iv->step;
2979   tree cbase = cand->iv->base;
2980   tree cstep = cand->iv->step, cstep_common;
2981   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2982   tree common_type, var;
2983   tree uutype;
2984   aff_tree cbase_aff, var_aff;
2985   double_int rat;
2986
2987   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2988     {
2989       /* We do not have a precision to express the values of use.  */
2990       return false;
2991     }
2992
2993   var = var_at_stmt (loop, cand, at);
2994   uutype = unsigned_type_for (utype);
2995
2996   /* If the conversion is not noop, perform it.  */
2997   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2998     {
2999       cstep = fold_convert (uutype, cstep);
3000       cbase = fold_convert (uutype, cbase);
3001       var = fold_convert (uutype, var);
3002     }
3003
3004   if (!constant_multiple_of (ustep, cstep, &rat))
3005     return false;
3006
3007   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3008      type, we achieve better folding by computing their difference in this
3009      wider type, and cast the result to UUTYPE.  We do not need to worry about
3010      overflows, as all the arithmetics will in the end be performed in UUTYPE
3011      anyway.  */
3012   common_type = determine_common_wider_type (&ubase, &cbase);
3013
3014   /* use = ubase - ratio * cbase + ratio * var.  */
3015   tree_to_aff_combination (ubase, common_type, aff);
3016   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3017   tree_to_aff_combination (var, uutype, &var_aff);
3018
3019   /* We need to shift the value if we are after the increment.  */
3020   if (stmt_after_increment (loop, cand, at))
3021     {
3022       aff_tree cstep_aff;
3023
3024       if (common_type != uutype)
3025         cstep_common = fold_convert (common_type, cstep);
3026       else
3027         cstep_common = cstep;
3028
3029       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3030       aff_combination_add (&cbase_aff, &cstep_aff);
3031     }
3032
3033   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3034   aff_combination_add (aff, &cbase_aff);
3035   if (common_type != uutype)
3036     aff_combination_convert (aff, uutype);
3037
3038   aff_combination_scale (&var_aff, rat);
3039   aff_combination_add (aff, &var_aff);
3040
3041   return true;
3042 }
3043
3044 /* Determines the expression by that USE is expressed from induction variable
3045    CAND at statement AT in LOOP.  The computation is unshared.  */
3046
3047 static tree
3048 get_computation_at (struct loop *loop,
3049                     struct iv_use *use, struct iv_cand *cand, gimple at)
3050 {
3051   aff_tree aff;
3052   tree type = TREE_TYPE (use->iv->base);
3053
3054   if (!get_computation_aff (loop, use, cand, at, &aff))
3055     return NULL_TREE;
3056   unshare_aff_combination (&aff);
3057   return fold_convert (type, aff_combination_to_tree (&aff));
3058 }
3059
3060 /* Determines the expression by that USE is expressed from induction variable
3061    CAND in LOOP.  The computation is unshared.  */
3062
3063 static tree
3064 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3065 {
3066   return get_computation_at (loop, use, cand, use->stmt);
3067 }
3068
3069 /* Adjust the cost COST for being in loop setup rather than loop body.
3070    If we're optimizing for space, the loop setup overhead is constant;
3071    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3072 static unsigned
3073 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3074 {
3075   if (cost == INFTY)
3076     return cost;
3077   else if (optimize_loop_for_speed_p (data->current_loop))
3078     return cost / avg_loop_niter (data->current_loop);
3079   else
3080     return cost;
3081 }
3082
3083 /* Returns cost of addition in MODE.  */
3084
3085 unsigned
3086 add_regs_cost (enum machine_mode mode, bool speed)
3087 {
3088   static unsigned costs[NUM_MACHINE_MODES][2];
3089   rtx seq;
3090   unsigned cost;
3091
3092   if (costs[mode][speed])
3093     return costs[mode][speed];
3094
3095   start_sequence ();
3096   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3097                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3098                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3099                  NULL_RTX);
3100   seq = get_insns ();
3101   end_sequence ();
3102
3103   cost = seq_cost (seq, speed);
3104   if (!cost)
3105     cost = 1;
3106
3107   costs[mode][speed] = cost;
3108
3109   if (dump_file && (dump_flags & TDF_DETAILS))
3110     fprintf (dump_file, "Addition in %s costs %d\n",
3111              GET_MODE_NAME (mode), cost);
3112   return cost;
3113 }
3114
3115 /* Returns cost of multiplication in MODE.  */
3116
3117 unsigned
3118 multiply_regs_cost (enum machine_mode mode, bool speed)
3119 {
3120   static unsigned costs[NUM_MACHINE_MODES][2];
3121   rtx seq;
3122   unsigned cost;
3123
3124   if (costs[mode][speed])
3125     return costs[mode][speed];
3126
3127   start_sequence ();
3128   force_operand (gen_rtx_fmt_ee (MULT, mode,
3129                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3130                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3131                  NULL_RTX);
3132   seq = get_insns ();
3133   end_sequence ();
3134
3135   cost = seq_cost (seq, speed);
3136   if (!cost)
3137     cost = 1;
3138
3139   costs[mode][speed] = cost;
3140
3141   if (dump_file && (dump_flags & TDF_DETAILS))
3142     fprintf (dump_file, "Multiplication in %s costs %d\n",
3143              GET_MODE_NAME (mode), cost);
3144   return cost;
3145 }
3146
3147 /* Returns cost of addition with a constant in MODE.  */
3148
3149 unsigned
3150 add_const_cost (enum machine_mode mode, bool speed)
3151 {
3152   static unsigned costs[NUM_MACHINE_MODES][2];
3153   rtx seq;
3154   unsigned cost;
3155
3156   if (costs[mode][speed])
3157     return costs[mode][speed];
3158
3159   /* Arbitrarily generate insns for x + 2, as the exact constant
3160      shouldn't matter.  */
3161   start_sequence ();
3162   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3163                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3164                                  gen_int_mode (2, mode)),
3165                  NULL_RTX);
3166   seq = get_insns ();
3167   end_sequence ();
3168
3169   cost = seq_cost (seq, speed);
3170   if (!cost)
3171     cost = 1;
3172
3173   costs[mode][speed] = cost;
3174
3175   if (dump_file && (dump_flags & TDF_DETAILS))
3176     fprintf (dump_file, "Addition to constant in %s costs %d\n",
3177              GET_MODE_NAME (mode), cost);
3178   return cost;
3179 }
3180
3181 /* Returns cost of extend or truncate in MODE.  */
3182
3183 unsigned
3184 extend_or_trunc_reg_cost (tree type_to, tree type_from, bool speed)
3185 {
3186   static unsigned costs[NUM_MACHINE_MODES][NUM_MACHINE_MODES][2];
3187   rtx seq;
3188   unsigned cost;
3189   enum machine_mode mode_to = TYPE_MODE (type_to);
3190   enum machine_mode mode_from = TYPE_MODE (type_from);
3191   tree size_to = TYPE_SIZE (type_to);
3192   tree size_from = TYPE_SIZE (type_from);
3193   enum rtx_code code;
3194
3195   gcc_assert (TREE_CODE (size_to) == INTEGER_CST
3196               && TREE_CODE (size_from) == INTEGER_CST);
3197
3198   if (costs[mode_to][mode_from][speed])
3199     return costs[mode_to][mode_from][speed];
3200
3201   if (tree_int_cst_lt (size_to, size_from))
3202     code = TRUNCATE;
3203   else if (TYPE_UNSIGNED (type_to))
3204     code = ZERO_EXTEND;
3205   else
3206     code = SIGN_EXTEND;
3207
3208   start_sequence ();
3209   gen_rtx_fmt_e (code, mode_to,
3210                  gen_raw_REG (mode_from, LAST_VIRTUAL_REGISTER + 1));
3211   seq = get_insns ();
3212   end_sequence ();
3213
3214   cost = seq_cost (seq, speed);
3215   if (!cost)
3216     cost = 1;
3217
3218   if (dump_file && (dump_flags & TDF_DETAILS))
3219     fprintf (dump_file, "Conversion from %s to %s costs %d\n",
3220              GET_MODE_NAME (mode_to), GET_MODE_NAME (mode_from), cost);
3221
3222   costs[mode_to][mode_from][speed] = cost;
3223   return cost;
3224 }
3225
3226 /* Returns cost of negation in MODE.  */
3227
3228 unsigned
3229 negate_reg_cost (enum machine_mode mode, bool speed)
3230 {
3231   static unsigned costs[NUM_MACHINE_MODES][2];
3232   rtx seq;
3233   unsigned cost;
3234
3235   if (costs[mode][speed])
3236     return costs[mode][speed];
3237
3238   start_sequence ();
3239   force_operand (gen_rtx_fmt_e (NEG, mode,
3240                                 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1)),
3241                  NULL_RTX);
3242   seq = get_insns ();
3243   end_sequence ();
3244
3245   cost = seq_cost (seq, speed);
3246   if (!cost)
3247     cost = 1;
3248
3249   costs[mode][speed] = cost;
3250
3251   if (dump_file && (dump_flags & TDF_DETAILS))
3252     fprintf (dump_file, "Negation in %s costs %d\n",
3253              GET_MODE_NAME (mode), cost);
3254   return cost;
3255 }
3256
3257 /* Entry in a hashtable of already known costs for multiplication.  */
3258 struct mbc_entry
3259 {
3260   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3261   enum machine_mode mode;       /* In mode.  */
3262   unsigned cost;                /* The cost.  */
3263 };
3264
3265 /* Counts hash value for the ENTRY.  */
3266
3267 static hashval_t
3268 mbc_entry_hash (const void *entry)
3269 {
3270   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3271
3272   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3273 }
3274
3275 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3276
3277 static int
3278 mbc_entry_eq (const void *entry1, const void *entry2)
3279 {
3280   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3281   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3282
3283   return (e1->mode == e2->mode
3284           && e1->cst == e2->cst);
3285 }
3286
3287 /* Returns cost of multiplication by constant CST in MODE.  */
3288
3289 unsigned
3290 multiply_by_const_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3291 {
3292   struct mbc_entry **cached, act;
3293   rtx seq;
3294   unsigned cost;
3295
3296   gcc_assert (cost_tables_exist);
3297
3298   act.mode = mode;
3299   act.cst = cst;
3300   cached = (struct mbc_entry **)
3301     htab_find_slot (mult_costs[speed], &act, INSERT);
3302
3303   if (*cached)
3304     return (*cached)->cost;
3305
3306   *cached = XNEW (struct mbc_entry);
3307   (*cached)->mode = mode;
3308   (*cached)->cst = cst;
3309
3310   start_sequence ();
3311   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3312                gen_int_mode (cst, mode), NULL_RTX, 0);
3313   seq = get_insns ();
3314   end_sequence ();
3315
3316   cost = seq_cost (seq, speed);
3317
3318   if (dump_file && (dump_flags & TDF_DETAILS))
3319     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3320              (int) cst, GET_MODE_NAME (mode), cost);
3321
3322   (*cached)->cost = cost;
3323
3324   return cost;
3325 }
3326
3327 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3328    validity for a memory reference accessing memory of mode MODE in
3329    address space AS.  */
3330
3331 DEF_VEC_P (sbitmap);
3332 DEF_VEC_ALLOC_P (sbitmap, heap);
3333
3334 bool
3335 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3336                                  addr_space_t as)
3337 {
3338 #define MAX_RATIO 128
3339   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3340   static VEC (sbitmap, heap) *valid_mult_list;
3341   sbitmap valid_mult;
3342
3343   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3344     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3345
3346   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3347   if (!valid_mult)
3348     {
3349       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3350       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3351       rtx addr;
3352       HOST_WIDE_INT i;
3353
3354       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3355       sbitmap_zero (valid_mult);
3356       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3357       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3358         {
3359           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3360           if (memory_address_addr_space_p (mode, addr, as))
3361             SET_BIT (valid_mult, i + MAX_RATIO);
3362         }
3363
3364       if (dump_file && (dump_flags & TDF_DETAILS))
3365         {
3366           fprintf (dump_file, "  allowed multipliers:");
3367           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3368             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3369               fprintf (dump_file, " %d", (int) i);
3370           fprintf (dump_file, "\n");
3371           fprintf (dump_file, "\n");
3372         }
3373
3374       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3375     }
3376
3377   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3378     return false;
3379
3380   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3381 }
3382
3383 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3384    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3385    variable is omitted.  Compute the cost for a memory reference that accesses
3386    a memory location of mode MEM_MODE in address space AS.
3387
3388    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3389    size of MEM_MODE / RATIO) is available.  To make this determination, we
3390    look at the size of the increment to be made, which is given in CSTEP.
3391    CSTEP may be zero if the step is unknown.
3392    STMT_AFTER_INC is true iff the statement we're looking at is after the
3393    increment of the original biv.
3394
3395    TODO -- there must be some better way.  This all is quite crude.  */
3396
3397 typedef struct
3398 {
3399   HOST_WIDE_INT min_offset, max_offset;
3400   unsigned costs[2][2][2][2];
3401 } *address_cost_data;
3402
3403 DEF_VEC_P (address_cost_data);
3404 DEF_VEC_ALLOC_P (address_cost_data, heap);
3405
3406 static comp_cost
3407 get_address_cost (bool symbol_present, bool var_present,
3408                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3409                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3410                   addr_space_t as, bool speed,
3411                   bool stmt_after_inc, bool *may_autoinc)
3412 {
3413   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3414   static VEC(address_cost_data, heap) *address_cost_data_list;
3415   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3416   address_cost_data data;
3417   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3418   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3419   unsigned cost, acost, complexity;
3420   bool offset_p, ratio_p, autoinc;
3421   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3422   unsigned HOST_WIDE_INT mask;
3423   unsigned bits;
3424
3425   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3426     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3427                            data_index + 1);
3428
3429   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3430   if (!data)
3431     {
3432       HOST_WIDE_INT i;
3433       HOST_WIDE_INT rat, off = 0;
3434       int old_cse_not_expected, width;
3435       unsigned sym_p, var_p, off_p, rat_p, add_c;
3436       rtx seq, addr, base;
3437       rtx reg0, reg1;
3438
3439       data = (address_cost_data) xcalloc (1, sizeof (*data));
3440
3441       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3442
3443       width = GET_MODE_BITSIZE (address_mode) - 1;
3444       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3445         width = HOST_BITS_PER_WIDE_INT - 1;
3446       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3447
3448       for (i = width; i >= 0; i--)
3449         {
3450           off = -((HOST_WIDE_INT) 1 << i);
3451           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3452           if (memory_address_addr_space_p (mem_mode, addr, as))
3453             break;
3454         }
3455       data->min_offset = (i == -1? 0 : off);
3456
3457       for (i = width; i >= 0; i--)
3458         {
3459           off = ((HOST_WIDE_INT) 1 << i) - 1;
3460           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3461           if (memory_address_addr_space_p (mem_mode, addr, as))
3462             break;
3463         }
3464       if (i == -1)
3465         off = 0;
3466       data->max_offset = off;
3467
3468       if (dump_file && (dump_flags & TDF_DETAILS))
3469         {
3470           fprintf (dump_file, "get_address_cost:\n");
3471           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3472                    GET_MODE_NAME (mem_mode),
3473                    data->min_offset);
3474           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3475                    GET_MODE_NAME (mem_mode),
3476                    data->max_offset);
3477         }
3478
3479       rat = 1;
3480       for (i = 2; i <= MAX_RATIO; i++)
3481         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3482           {
3483             rat = i;
3484             break;
3485           }
3486
3487       /* Compute the cost of various addressing modes.  */
3488       acost = 0;
3489       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3490       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3491
3492       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3493           || USE_STORE_PRE_DECREMENT (mem_mode))
3494         {
3495           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3496           has_predec[mem_mode]
3497             = memory_address_addr_space_p (mem_mode, addr, as);
3498         }
3499       if (USE_LOAD_POST_DECREMENT (mem_mode)
3500           || USE_STORE_POST_DECREMENT (mem_mode))
3501         {
3502           addr = gen_rtx_POST_DEC (address_mode, reg0);
3503           has_postdec[mem_mode]
3504             = memory_address_addr_space_p (mem_mode, addr, as);
3505         }
3506       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3507           || USE_STORE_PRE_DECREMENT (mem_mode))
3508         {
3509           addr = gen_rtx_PRE_INC (address_mode, reg0);
3510           has_preinc[mem_mode]
3511             = memory_address_addr_space_p (mem_mode, addr, as);
3512         }
3513       if (USE_LOAD_POST_INCREMENT (mem_mode)
3514           || USE_STORE_POST_INCREMENT (mem_mode))
3515         {
3516           addr = gen_rtx_POST_INC (address_mode, reg0);
3517           has_postinc[mem_mode]
3518             = memory_address_addr_space_p (mem_mode, addr, as);
3519         }
3520       for (i = 0; i < 16; i++)
3521         {
3522           sym_p = i & 1;
3523           var_p = (i >> 1) & 1;
3524           off_p = (i >> 2) & 1;
3525           rat_p = (i >> 3) & 1;
3526
3527           addr = reg0;
3528           if (rat_p)
3529             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3530                                    gen_int_mode (rat, address_mode));
3531
3532           if (var_p)
3533             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3534
3535           if (sym_p)
3536             {
3537               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3538               /* ??? We can run into trouble with some backends by presenting
3539                  it with symbols which haven't been properly passed through
3540                  targetm.encode_section_info.  By setting the local bit, we
3541                  enhance the probability of things working.  */
3542               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3543
3544               if (off_p)
3545                 base = gen_rtx_fmt_e (CONST, address_mode,
3546                                       gen_rtx_fmt_ee
3547                                         (PLUS, address_mode, base,
3548                                          gen_int_mode (off, address_mode)));
3549             }
3550           else if (off_p)
3551             base = gen_int_mode (off, address_mode);
3552           else
3553             base = NULL_RTX;
3554
3555           if (base)
3556             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3557
3558           start_sequence ();
3559           /* To avoid splitting addressing modes, pretend that no cse will
3560              follow.  */
3561           old_cse_not_expected = cse_not_expected;
3562           cse_not_expected = true;
3563           addr = memory_address_addr_space (mem_mode, addr, as);
3564           cse_not_expected = old_cse_not_expected;
3565           seq = get_insns ();
3566           end_sequence ();
3567
3568           acost = seq_cost (seq, speed);
3569           acost += address_cost (addr, mem_mode, as, speed);
3570
3571           if (!acost)
3572             acost = 1;
3573           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3574         }
3575
3576       /* On some targets, it is quite expensive to load symbol to a register,
3577          which makes addresses that contain symbols look much more expensive.
3578          However, the symbol will have to be loaded in any case before the
3579          loop (and quite likely we have it in register already), so it does not
3580          make much sense to penalize them too heavily.  So make some final
3581          tweaks for the SYMBOL_PRESENT modes:
3582
3583          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3584          var is cheaper, use this mode with small penalty.
3585          If VAR_PRESENT is true, try whether the mode with
3586          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3587          if this is the case, use it.  */
3588       add_c = add_regs_cost (address_mode, speed);
3589       for (i = 0; i < 8; i++)
3590         {
3591           var_p = i & 1;
3592           off_p = (i >> 1) & 1;
3593           rat_p = (i >> 2) & 1;
3594
3595           acost = data->costs[0][1][off_p][rat_p] + 1;
3596           if (var_p)
3597             acost += add_c;
3598
3599           if (acost < data->costs[1][var_p][off_p][rat_p])
3600             data->costs[1][var_p][off_p][rat_p] = acost;
3601         }
3602
3603       if (dump_file && (dump_flags & TDF_DETAILS))
3604         {
3605           fprintf (dump_file, "Address costs:\n");
3606
3607           for (i = 0; i < 16; i++)
3608             {
3609               sym_p = i & 1;
3610               var_p = (i >> 1) & 1;
3611               off_p = (i >> 2) & 1;
3612               rat_p = (i >> 3) & 1;
3613
3614               fprintf (dump_file, "  ");
3615               if (sym_p)
3616                 fprintf (dump_file, "sym + ");
3617               if (var_p)
3618                 fprintf (dump_file, "var + ");
3619               if (off_p)
3620                 fprintf (dump_file, "cst + ");
3621               if (rat_p)
3622                 fprintf (dump_file, "rat * ");
3623
3624               acost = data->costs[sym_p][var_p][off_p][rat_p];
3625               fprintf (dump_file, "index costs %d\n", acost);
3626             }
3627           if (has_predec[mem_mode] || has_postdec[mem_mode]
3628               || has_preinc[mem_mode] || has_postinc[mem_mode])
3629             fprintf (dump_file, "  May include autoinc/dec\n");
3630           fprintf (dump_file, "\n");
3631         }
3632
3633       VEC_replace (address_cost_data, address_cost_data_list,
3634                    data_index, data);
3635     }
3636
3637   bits = GET_MODE_BITSIZE (address_mode);
3638   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3639   offset &= mask;
3640   if ((offset >> (bits - 1) & 1))
3641     offset |= ~mask;
3642   s_offset = offset;
3643
3644   autoinc = false;
3645   msize = GET_MODE_SIZE (mem_mode);
3646   autoinc_offset = offset;
3647   if (stmt_after_inc)
3648     autoinc_offset += ratio * cstep;
3649   if (symbol_present || var_present || ratio != 1)
3650     autoinc = false;
3651   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3652                && msize == cstep)
3653            || (has_postdec[mem_mode] && autoinc_offset == 0
3654                && msize == -cstep)
3655            || (has_preinc[mem_mode] && autoinc_offset == msize
3656                && msize == cstep)
3657            || (has_predec[mem_mode] && autoinc_offset == -msize
3658                && msize == -cstep))
3659     autoinc = true;
3660
3661   cost = 0;
3662   offset_p = (s_offset != 0
3663               && data->min_offset <= s_offset
3664               && s_offset <= data->max_offset);
3665   ratio_p = (ratio != 1
3666              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3667
3668   if (ratio != 1 && !ratio_p)
3669     cost += multiply_by_const_cost (ratio, address_mode, speed);
3670
3671   if (s_offset && !offset_p && !symbol_present)
3672     cost += add_regs_cost (address_mode, speed);
3673
3674   if (may_autoinc)
3675     *may_autoinc = autoinc;
3676   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3677   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3678   return new_cost (cost + acost, complexity);
3679 }
3680
3681  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3682     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3683     calculating the operands of EXPR.  Returns true if successful, and returns
3684     the cost in COST.  */
3685
3686 static bool
3687 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3688                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3689 {
3690   comp_cost res;
3691   tree op1 = TREE_OPERAND (expr, 1);
3692   tree cst = TREE_OPERAND (mult, 1);
3693   tree multop = TREE_OPERAND (mult, 0);
3694   int m = exact_log2 (int_cst_value (cst));
3695   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3696   int sa_cost;
3697
3698   if (!(m >= 0 && m < maxm))
3699     return false;
3700
3701   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3702              ? shiftadd_cost[speed][mode][m]
3703              : (mult == op1
3704                 ? shiftsub1_cost[speed][mode][m]
3705                 : shiftsub0_cost[speed][mode][m]));
3706   res = new_cost (sa_cost, 0);
3707   res = add_costs (res, mult == op1 ? cost0 : cost1);
3708
3709   STRIP_NOPS (multop);
3710   if (!is_gimple_val (multop))
3711     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3712
3713   *cost = res;
3714   return true;
3715 }
3716
3717 /* Estimates cost of forcing expression EXPR into a variable.  */
3718
3719 static comp_cost
3720 force_expr_to_var_cost (tree expr, bool speed)
3721 {
3722   static bool costs_initialized = false;
3723   static unsigned integer_cost [2];
3724   static unsigned symbol_cost [2];
3725   static unsigned address_cost [2];
3726   tree op0, op1;
3727   comp_cost cost0, cost1, cost;
3728   enum machine_mode mode;
3729
3730   if (!costs_initialized)
3731     {
3732       tree type = build_pointer_type (integer_type_node);
3733       tree var, addr;
3734       rtx x;
3735       int i;
3736
3737       var = create_tmp_var_raw (integer_type_node, "test_var");
3738       TREE_STATIC (var) = 1;
3739       x = produce_memory_decl_rtl (var, NULL);
3740       SET_DECL_RTL (var, x);
3741
3742       addr = build1 (ADDR_EXPR, type, var);
3743
3744
3745       for (i = 0; i < 2; i++)
3746         {
3747           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3748                                                              2000), i);
3749
3750           symbol_cost[i] = computation_cost (addr, i) + 1;
3751
3752           address_cost[i]
3753             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3754           if (dump_file && (dump_flags & TDF_DETAILS))
3755             {
3756               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3757               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3758               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3759               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3760               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3761               fprintf (dump_file, "\n");
3762             }
3763         }
3764
3765       costs_initialized = true;
3766     }
3767
3768   STRIP_NOPS (expr);
3769
3770   if (SSA_VAR_P (expr))
3771     return no_cost;
3772
3773   if (is_gimple_min_invariant (expr))
3774     {
3775       if (TREE_CODE (expr) == INTEGER_CST)
3776         return new_cost (integer_cost [speed], 0);
3777
3778       if (TREE_CODE (expr) == ADDR_EXPR)
3779         {
3780           tree obj = TREE_OPERAND (expr, 0);
3781
3782           if (TREE_CODE (obj) == VAR_DECL
3783               || TREE_CODE (obj) == PARM_DECL
3784               || TREE_CODE (obj) == RESULT_DECL)
3785             return new_cost (symbol_cost [speed], 0);
3786         }
3787
3788       return new_cost (address_cost [speed], 0);
3789     }
3790
3791   switch (TREE_CODE (expr))
3792     {
3793     case POINTER_PLUS_EXPR:
3794     case PLUS_EXPR:
3795     case MINUS_EXPR:
3796     case MULT_EXPR:
3797       op0 = TREE_OPERAND (expr, 0);
3798       op1 = TREE_OPERAND (expr, 1);
3799       STRIP_NOPS (op0);
3800       STRIP_NOPS (op1);
3801
3802       if (is_gimple_val (op0))
3803         cost0 = no_cost;
3804       else
3805         cost0 = force_expr_to_var_cost (op0, speed);
3806
3807       if (is_gimple_val (op1))
3808         cost1 = no_cost;
3809       else
3810         cost1 = force_expr_to_var_cost (op1, speed);
3811
3812       break;
3813
3814     case NEGATE_EXPR:
3815       op0 = TREE_OPERAND (expr, 0);
3816       STRIP_NOPS (op0);
3817       op1 = NULL_TREE;
3818
3819       if (is_gimple_val (op0))
3820         cost0 = no_cost;
3821       else
3822         cost0 = force_expr_to_var_cost (op0, speed);
3823
3824       cost1 = no_cost;
3825       break;
3826
3827     default:
3828       /* Just an arbitrary value, FIXME.  */
3829       return new_cost (target_spill_cost[speed], 0);
3830     }
3831
3832   mode = TYPE_MODE (TREE_TYPE (expr));
3833   switch (TREE_CODE (expr))
3834     {
3835     case POINTER_PLUS_EXPR:
3836     case PLUS_EXPR:
3837     case MINUS_EXPR:
3838     case NEGATE_EXPR:
3839       cost = new_cost (add_regs_cost (mode, speed), 0);
3840       if (TREE_CODE (expr) != NEGATE_EXPR)
3841         {
3842           tree mult = NULL_TREE;
3843           comp_cost sa_cost;
3844           if (TREE_CODE (op1) == MULT_EXPR)
3845             mult = op1;
3846           else if (TREE_CODE (op0) == MULT_EXPR)
3847             mult = op0;
3848
3849           if (mult != NULL_TREE
3850               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3851               && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3852                                     &sa_cost))
3853             return sa_cost;
3854         }
3855       break;
3856
3857     case MULT_EXPR:
3858       if (cst_and_fits_in_hwi (op0))
3859         cost = new_cost (multiply_by_const_cost (int_cst_value (op0),
3860                                                  mode, speed), 0);
3861       else if (cst_and_fits_in_hwi (op1))
3862         cost = new_cost (multiply_by_const_cost (int_cst_value (op1),
3863                                                  mode, speed), 0);
3864       else
3865         return new_cost (target_spill_cost [speed], 0);
3866       break;
3867
3868     default:
3869       gcc_unreachable ();
3870     }
3871
3872   cost = add_costs (cost, cost0);
3873   cost = add_costs (cost, cost1);
3874
3875   /* Bound the cost by target_spill_cost.  The parts of complicated
3876      computations often are either loop invariant or at least can
3877      be shared between several iv uses, so letting this grow without
3878      limits would not give reasonable results.  */
3879   if (cost.cost > (int) target_spill_cost [speed])
3880     cost.cost = target_spill_cost [speed];
3881
3882   return cost;
3883 }
3884
3885 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3886    invariants the computation depends on.  */
3887
3888 static comp_cost
3889 force_var_cost (struct ivopts_data *data,
3890                 tree expr, bitmap *depends_on)
3891 {
3892   if (depends_on)
3893     {
3894       fd_ivopts_data = data;
3895       walk_tree (&expr, find_depends, depends_on, NULL);
3896     }
3897
3898   return force_expr_to_var_cost (expr, data->speed);
3899 }
3900
3901 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3902    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3903    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3904    invariants the computation depends on.  */
3905
3906 static comp_cost
3907 split_address_cost (struct ivopts_data *data,
3908                     tree addr, bool *symbol_present, bool *var_present,
3909                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3910 {
3911   tree core;
3912   HOST_WIDE_INT bitsize;
3913   HOST_WIDE_INT bitpos;
3914   tree toffset;
3915   enum machine_mode mode;
3916   int unsignedp, volatilep;
3917
3918   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3919                               &unsignedp, &volatilep, false);
3920
3921   if (toffset != 0
3922       || bitpos % BITS_PER_UNIT != 0
3923       || TREE_CODE (core) != VAR_DECL)
3924     {
3925       *symbol_present = false;
3926       *var_present = true;
3927       fd_ivopts_data = data;
3928       walk_tree (&addr, find_depends, depends_on, NULL);
3929       return new_cost (target_spill_cost[data->speed], 0);
3930     }
3931
3932   *offset += bitpos / BITS_PER_UNIT;
3933   if (TREE_STATIC (core)
3934       || DECL_EXTERNAL (core))
3935     {
3936       *symbol_present = true;
3937       *var_present = false;
3938       return no_cost;
3939     }
3940
3941   *symbol_present = false;
3942   *var_present = true;
3943   return no_cost;
3944 }
3945
3946 /* Estimates cost of expressing difference of addresses E1 - E2 as
3947    var + symbol + offset.  The value of offset is added to OFFSET,
3948    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3949    part is missing.  DEPENDS_ON is a set of the invariants the computation
3950    depends on.  */
3951
3952 static comp_cost
3953 ptr_difference_cost (struct ivopts_data *data,
3954                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3955                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3956 {
3957   HOST_WIDE_INT diff = 0;
3958   aff_tree aff_e1, aff_e2;
3959   tree type;
3960
3961   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3962
3963   if (ptr_difference_const (e1, e2, &diff))
3964     {
3965       *offset += diff;
3966       *symbol_present = false;
3967       *var_present = false;
3968       return no_cost;
3969     }
3970
3971   if (integer_zerop (e2))
3972     return split_address_cost (data, TREE_OPERAND (e1, 0),
3973                                symbol_present, var_present, offset, depends_on);
3974
3975   *symbol_present = false;
3976   *var_present = true;
3977
3978   type = signed_type_for (TREE_TYPE (e1));
3979   tree_to_aff_combination (e1, type, &aff_e1);
3980   tree_to_aff_combination (e2, type, &aff_e2);
3981   aff_combination_scale (&aff_e2, double_int_minus_one);
3982   aff_combination_add (&aff_e1, &aff_e2);
3983
3984   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3985 }
3986
3987 /* Estimates cost of expressing difference E1 - E2 as
3988    var + symbol + offset.  The value of offset is added to OFFSET,
3989    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3990    part is missing.  DEPENDS_ON is a set of the invariants the computation
3991    depends on.  */
3992
3993 static comp_cost
3994 difference_cost (struct ivopts_data *data,
3995                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3996                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3997 {
3998   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3999   unsigned HOST_WIDE_INT off1, off2;
4000   aff_tree aff_e1, aff_e2;
4001   tree type;
4002
4003   e1 = strip_offset (e1, &off1);
4004   e2 = strip_offset (e2, &off2);
4005   *offset += off1 - off2;
4006
4007   STRIP_NOPS (e1);
4008   STRIP_NOPS (e2);
4009
4010   if (TREE_CODE (e1) == ADDR_EXPR)
4011     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4012                                 offset, depends_on);
4013   *symbol_present = false;
4014
4015   if (operand_equal_p (e1, e2, 0))
4016     {
4017       *var_present = false;
4018       return no_cost;
4019     }
4020
4021   *var_present = true;
4022
4023   if (integer_zerop (e2))
4024     return force_var_cost (data, e1, depends_on);
4025
4026   if (integer_zerop (e1))
4027     {
4028       comp_cost cost = force_var_cost (data, e2, depends_on);
4029       cost.cost += multiply_by_const_cost (-1, mode, data->speed);
4030       return cost;
4031     }
4032
4033   type = signed_type_for (TREE_TYPE (e1));
4034   tree_to_aff_combination (e1, type, &aff_e1);
4035   tree_to_aff_combination (e2, type, &aff_e2);
4036   aff_combination_scale (&aff_e2, double_int_minus_one);
4037   aff_combination_add (&aff_e1, &aff_e2);
4038
4039   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4040 }
4041
4042 /* Returns true if AFF1 and AFF2 are identical.  */
4043
4044 static bool
4045 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4046 {
4047   unsigned i;
4048
4049   if (aff1->n != aff2->n)
4050     return false;
4051
4052   for (i = 0; i < aff1->n; i++)
4053     {
4054       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
4055         return false;
4056
4057       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4058         return false;
4059     }
4060   return true;
4061 }
4062
4063 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4064
4065 static int
4066 get_expr_id (struct ivopts_data *data, tree expr)
4067 {
4068   struct iv_inv_expr_ent ent;
4069   struct iv_inv_expr_ent **slot;
4070
4071   ent.expr = expr;
4072   ent.hash = iterative_hash_expr (expr, 0);
4073   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
4074                                                      &ent, INSERT);
4075   if (*slot)
4076     return (*slot)->id;
4077
4078   *slot = XNEW (struct iv_inv_expr_ent);
4079   (*slot)->expr = expr;
4080   (*slot)->hash = ent.hash;
4081   (*slot)->id = data->inv_expr_id++;
4082   return (*slot)->id;
4083 }
4084
4085 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4086    requires a new compiler generated temporary.  Returns -1 otherwise.
4087    ADDRESS_P is a flag indicating if the expression is for address
4088    computation.  */
4089
4090 static int
4091 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4092                             tree cbase, HOST_WIDE_INT ratio,
4093                             bool address_p)
4094 {
4095   aff_tree ubase_aff, cbase_aff;
4096   tree expr, ub, cb;
4097
4098   STRIP_NOPS (ubase);
4099   STRIP_NOPS (cbase);
4100   ub = ubase;
4101   cb = cbase;
4102
4103   if ((TREE_CODE (ubase) == INTEGER_CST)
4104       && (TREE_CODE (cbase) == INTEGER_CST))
4105     return -1;
4106
4107   /* Strips the constant part. */
4108   if (TREE_CODE (ubase) == PLUS_EXPR
4109       || TREE_CODE (ubase) == MINUS_EXPR
4110       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4111     {
4112       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4113         ubase = TREE_OPERAND (ubase, 0);
4114     }
4115
4116   /* Strips the constant part. */
4117   if (TREE_CODE (cbase) == PLUS_EXPR
4118       || TREE_CODE (cbase) == MINUS_EXPR
4119       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4120     {
4121       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4122         cbase = TREE_OPERAND (cbase, 0);
4123     }
4124
4125   if (address_p)
4126     {
4127       if (((TREE_CODE (ubase) == SSA_NAME)
4128            || (TREE_CODE (ubase) == ADDR_EXPR
4129                && is_gimple_min_invariant (ubase)))
4130           && (TREE_CODE (cbase) == INTEGER_CST))
4131         return -1;
4132
4133       if (((TREE_CODE (cbase) == SSA_NAME)
4134            || (TREE_CODE (cbase) == ADDR_EXPR
4135                && is_gimple_min_invariant (cbase)))
4136           && (TREE_CODE (ubase) == INTEGER_CST))
4137         return -1;
4138     }
4139
4140   if (ratio == 1)
4141     {
4142       if(operand_equal_p (ubase, cbase, 0))
4143         return -1;
4144
4145       if (TREE_CODE (ubase) == ADDR_EXPR
4146           && TREE_CODE (cbase) == ADDR_EXPR)
4147         {
4148           tree usym, csym;
4149
4150           usym = TREE_OPERAND (ubase, 0);
4151           csym = TREE_OPERAND (cbase, 0);
4152           if (TREE_CODE (usym) == ARRAY_REF)
4153             {
4154               tree ind = TREE_OPERAND (usym, 1);
4155               if (TREE_CODE (ind) == INTEGER_CST
4156                   && host_integerp (ind, 0)
4157                   && TREE_INT_CST_LOW (ind) == 0)
4158                 usym = TREE_OPERAND (usym, 0);
4159             }
4160           if (TREE_CODE (csym) == ARRAY_REF)
4161             {
4162               tree ind = TREE_OPERAND (csym, 1);
4163               if (TREE_CODE (ind) == INTEGER_CST
4164                   && host_integerp (ind, 0)
4165                   && TREE_INT_CST_LOW (ind) == 0)
4166                 csym = TREE_OPERAND (csym, 0);
4167             }
4168           if (operand_equal_p (usym, csym, 0))
4169             return -1;
4170         }
4171       /* Now do more complex comparison  */
4172       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4173       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4174       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4175         return -1;
4176     }
4177
4178   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4179   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4180
4181   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4182   aff_combination_add (&ubase_aff, &cbase_aff);
4183   expr = aff_combination_to_tree (&ubase_aff);
4184   return get_expr_id (data, expr);
4185 }
4186
4187
4188
4189 /* Determines the cost of the computation by that USE is expressed
4190    from induction variable CAND.  If ADDRESS_P is true, we just need
4191    to create an address from it, otherwise we want to get it into
4192    register.  A set of invariants we depend on is stored in
4193    DEPENDS_ON.  AT is the statement at that the value is computed.
4194    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4195    addressing is likely.  */
4196
4197 static comp_cost
4198 get_computation_cost_at (struct ivopts_data *data,
4199                          struct iv_use *use, struct iv_cand *cand,
4200                          bool address_p, bitmap *depends_on, gimple at,
4201                          bool *can_autoinc,
4202                          int *inv_expr_id)
4203 {
4204   tree ubase = use->iv->base, ustep = use->iv->step;
4205   tree cbase, cstep;
4206   tree utype = TREE_TYPE (ubase), ctype;
4207   unsigned HOST_WIDE_INT cstepi, offset = 0;
4208   HOST_WIDE_INT ratio, aratio;
4209   bool var_present, symbol_present, stmt_is_after_inc;
4210   comp_cost cost;
4211   double_int rat;
4212   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4213
4214   *depends_on = NULL;
4215
4216   /* Only consider real candidates.  */
4217   if (!cand->iv)
4218     return infinite_cost;
4219
4220   cbase = cand->iv->base;
4221   cstep = cand->iv->step;
4222   ctype = TREE_TYPE (cbase);
4223
4224   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4225     {
4226       /* We do not have a precision to express the values of use.  */
4227       return infinite_cost;
4228     }
4229
4230   if (address_p
4231       || (use->iv->base_object
4232           && cand->iv->base_object
4233           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4234           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4235     {
4236       /* Do not try to express address of an object with computation based
4237          on address of a different object.  This may cause problems in rtl
4238          level alias analysis (that does not expect this to be happening,
4239          as this is illegal in C), and would be unlikely to be useful
4240          anyway.  */
4241       if (use->iv->base_object
4242           && cand->iv->base_object
4243           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4244         return infinite_cost;
4245     }
4246
4247   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4248     {
4249       /* TODO -- add direct handling of this case.  */
4250       goto fallback;
4251     }
4252
4253   /* CSTEPI is removed from the offset in case statement is after the
4254      increment.  If the step is not constant, we use zero instead.
4255      This is a bit imprecise (there is the extra addition), but
4256      redundancy elimination is likely to transform the code so that
4257      it uses value of the variable before increment anyway,
4258      so it is not that much unrealistic.  */
4259   if (cst_and_fits_in_hwi (cstep))
4260     cstepi = int_cst_value (cstep);
4261   else
4262     cstepi = 0;
4263
4264   if (!constant_multiple_of (ustep, cstep, &rat))
4265     return infinite_cost;
4266
4267   if (double_int_fits_in_shwi_p (rat))
4268     ratio = double_int_to_shwi (rat);
4269   else
4270     return infinite_cost;
4271
4272   STRIP_NOPS (cbase);
4273   ctype = TREE_TYPE (cbase);
4274
4275   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4276
4277   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4278      or ratio == 1, it is better to handle this like
4279
4280      ubase - ratio * cbase + ratio * var
4281
4282      (also holds in the case ratio == -1, TODO.  */
4283
4284   if (cst_and_fits_in_hwi (cbase))
4285     {
4286       offset = - ratio * int_cst_value (cbase);
4287       cost = difference_cost (data,
4288                               ubase, build_int_cst (utype, 0),
4289                               &symbol_present, &var_present, &offset,
4290                               depends_on);
4291       cost.cost /= avg_loop_niter (data->current_loop);
4292     }
4293   else if (ratio == 1)
4294     {
4295       tree real_cbase = cbase;
4296
4297       /* Check to see if any adjustment is needed.  */
4298       if (cstepi == 0 && stmt_is_after_inc)
4299         {
4300           aff_tree real_cbase_aff;
4301           aff_tree cstep_aff;
4302
4303           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4304                                    &real_cbase_aff);
4305           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4306
4307           aff_combination_add (&real_cbase_aff, &cstep_aff);
4308           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4309         }
4310
4311       cost = difference_cost (data,
4312                               ubase, real_cbase,
4313                               &symbol_present, &var_present, &offset,
4314                               depends_on);
4315       cost.cost /= avg_loop_niter (data->current_loop);
4316     }
4317   else if (address_p
4318            && !POINTER_TYPE_P (ctype)
4319            && multiplier_allowed_in_address_p
4320                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4321                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4322     {
4323       cbase
4324         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4325       cost = difference_cost (data,
4326                               ubase, cbase,
4327                               &symbol_present, &var_present, &offset,
4328                               depends_on);
4329       cost.cost /= avg_loop_niter (data->current_loop);
4330     }
4331   else
4332     {
4333       cost = force_var_cost (data, cbase, depends_on);
4334       cost = add_costs (cost,
4335                         difference_cost (data,
4336                                          ubase, build_int_cst (utype, 0),
4337                                          &symbol_present, &var_present,
4338                                          &offset, depends_on));
4339       cost.cost /= avg_loop_niter (data->current_loop);
4340       cost.cost += add_regs_cost (TYPE_MODE (ctype), data->speed);
4341     }
4342
4343   if (inv_expr_id)
4344     {
4345       *inv_expr_id =
4346           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4347       /* Clear depends on.  */
4348       if (*inv_expr_id != -1 && depends_on && *depends_on)
4349         bitmap_clear (*depends_on);
4350     }
4351
4352   /* If we are after the increment, the value of the candidate is higher by
4353      one iteration.  */
4354   if (stmt_is_after_inc)
4355     offset -= ratio * cstepi;
4356
4357   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4358      (symbol/var1/const parts may be omitted).  If we are looking for an
4359      address, find the cost of addressing this.  */
4360   if (address_p)
4361     return add_costs (cost,
4362                       get_address_cost (symbol_present, var_present,
4363                                         offset, ratio, cstepi,
4364                                         TYPE_MODE (TREE_TYPE (utype)),
4365                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4366                                         speed, stmt_is_after_inc,
4367                                         can_autoinc));
4368
4369   /* Otherwise estimate the costs for computing the expression.  */
4370   if (!symbol_present && !var_present && !offset)
4371     {
4372       if (ratio != 1)
4373         cost.cost += multiply_by_const_cost (ratio, TYPE_MODE (ctype), speed);
4374       return cost;
4375     }
4376
4377   /* Symbol + offset should be compile-time computable so consider that they
4378       are added once to the variable, if present.  */
4379   if (var_present && (symbol_present || offset))
4380     cost.cost += adjust_setup_cost (data,
4381                                     add_regs_cost (TYPE_MODE (ctype), speed));
4382
4383   /* Having offset does not affect runtime cost in case it is added to
4384      symbol, but it increases complexity.  */
4385   if (offset)
4386     cost.complexity++;
4387
4388   cost.cost += add_regs_cost (TYPE_MODE (ctype), speed);
4389
4390   aratio = ratio > 0 ? ratio : -ratio;
4391   if (aratio != 1)
4392     cost.cost += multiply_by_const_cost (aratio, TYPE_MODE (ctype), speed);
4393   return cost;
4394
4395 fallback:
4396   if (can_autoinc)
4397     *can_autoinc = false;
4398
4399   {
4400     /* Just get the expression, expand it and measure the cost.  */
4401     tree comp = get_computation_at (data->current_loop, use, cand, at);
4402
4403     if (!comp)
4404       return infinite_cost;
4405
4406     if (address_p)
4407       comp = build_simple_mem_ref (comp);
4408
4409     return new_cost (computation_cost (comp, speed), 0);
4410   }
4411 }
4412
4413 /* Determines the cost of the computation by that USE is expressed
4414    from induction variable CAND.  If ADDRESS_P is true, we just need
4415    to create an address from it, otherwise we want to get it into
4416    register.  A set of invariants we depend on is stored in
4417    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4418    autoinc addressing is likely.  */
4419
4420 static comp_cost
4421 get_computation_cost (struct ivopts_data *data,
4422                       struct iv_use *use, struct iv_cand *cand,
4423                       bool address_p, bitmap *depends_on,
4424                       bool *can_autoinc, int *inv_expr_id)
4425 {
4426   return get_computation_cost_at (data,
4427                                   use, cand, address_p, depends_on, use->stmt,
4428                                   can_autoinc, inv_expr_id);
4429 }
4430
4431 /* Determines cost of basing replacement of USE on CAND in a generic
4432    expression.  */
4433
4434 static bool
4435 determine_use_iv_cost_generic (struct ivopts_data *data,
4436                                struct iv_use *use, struct iv_cand *cand)
4437 {
4438   bitmap depends_on;
4439   comp_cost cost;
4440   int inv_expr_id = -1;
4441
4442   /* The simple case first -- if we need to express value of the preserved
4443      original biv, the cost is 0.  This also prevents us from counting the
4444      cost of increment twice -- once at this use and once in the cost of
4445      the candidate.  */
4446   if (cand->pos == IP_ORIGINAL
4447       && cand->incremented_at == use->stmt)
4448     {
4449       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4450                        ERROR_MARK, -1);
4451       return true;
4452     }
4453
4454   cost = get_computation_cost (data, use, cand, false, &depends_on,
4455                                NULL, &inv_expr_id);
4456
4457   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4458                    inv_expr_id);
4459
4460   return !infinite_cost_p (cost);
4461 }
4462
4463 /* Determines cost of basing replacement of USE on CAND in an address.  */
4464
4465 static bool
4466 determine_use_iv_cost_address (struct ivopts_data *data,
4467                                struct iv_use *use, struct iv_cand *cand)
4468 {
4469   bitmap depends_on;
4470   bool can_autoinc;
4471   int inv_expr_id = -1;
4472   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4473                                          &can_autoinc, &inv_expr_id);
4474
4475   if (cand->ainc_use == use)
4476     {
4477       if (can_autoinc)
4478         cost.cost -= cand->cost_step;
4479       /* If we generated the candidate solely for exploiting autoincrement
4480          opportunities, and it turns out it can't be used, set the cost to
4481          infinity to make sure we ignore it.  */
4482       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4483         cost = infinite_cost;
4484     }
4485   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4486                    inv_expr_id);
4487
4488   return !infinite_cost_p (cost);
4489 }
4490
4491 /* Computes value of candidate CAND at position AT in iteration NITER, and
4492    stores it to VAL.  */
4493
4494 static void
4495 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4496                aff_tree *val)
4497 {
4498   aff_tree step, delta, nit;
4499   struct iv *iv = cand->iv;
4500   tree type = TREE_TYPE (iv->base);
4501   tree steptype = type;
4502   if (POINTER_TYPE_P (type))
4503     steptype = sizetype;
4504
4505   tree_to_aff_combination (iv->step, steptype, &step);
4506   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4507   aff_combination_convert (&nit, steptype);
4508   aff_combination_mult (&nit, &step, &delta);
4509   if (stmt_after_increment (loop, cand, at))
4510     aff_combination_add (&delta, &step);
4511
4512   tree_to_aff_combination (iv->base, type, val);
4513   aff_combination_add (val, &delta);
4514 }
4515
4516 /* Returns period of induction variable iv.  */
4517
4518 static tree
4519 iv_period (struct iv *iv)
4520 {
4521   tree step = iv->step, period, type;
4522   tree pow2div;
4523
4524   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4525
4526   type = unsigned_type_for (TREE_TYPE (step));
4527   /* Period of the iv is lcm (step, type_range)/step -1,
4528      i.e., N*type_range/step - 1. Since type range is power
4529      of two, N == (step >> num_of_ending_zeros_binary (step),
4530      so the final result is
4531
4532        (type_range >> num_of_ending_zeros_binary (step)) - 1
4533
4534   */
4535   pow2div = num_ending_zeros (step);
4536
4537   period = build_low_bits_mask (type,
4538                                 (TYPE_PRECISION (type)
4539                                  - tree_low_cst (pow2div, 1)));
4540
4541   return period;
4542 }
4543
4544 /* Returns the comparison operator used when eliminating the iv USE.  */
4545
4546 static enum tree_code
4547 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4548 {
4549   struct loop *loop = data->current_loop;
4550   basic_block ex_bb;
4551   edge exit;
4552
4553   ex_bb = gimple_bb (use->stmt);
4554   exit = EDGE_SUCC (ex_bb, 0);
4555   if (flow_bb_inside_loop_p (loop, exit->dest))
4556     exit = EDGE_SUCC (ex_bb, 1);
4557
4558   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4559 }
4560
4561 static tree
4562 strip_wrap_conserving_type_conversions (tree exp)
4563 {
4564   while (tree_ssa_useless_type_conversion (exp)
4565          && (nowrap_type_p (TREE_TYPE (exp))
4566              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4567     exp = TREE_OPERAND (exp, 0);
4568   return exp;
4569 }
4570
4571 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4572    check for an exact match.  */
4573
4574 static bool
4575 expr_equal_p (tree e, tree what)
4576 {
4577   gimple stmt;
4578   enum tree_code code;
4579
4580   e = strip_wrap_conserving_type_conversions (e);
4581   what = strip_wrap_conserving_type_conversions (what);
4582
4583   code = TREE_CODE (what);
4584   if (TREE_TYPE (e) != TREE_TYPE (what))
4585     return false;
4586
4587   if (operand_equal_p (e, what, 0))
4588     return true;
4589
4590   if (TREE_CODE (e) != SSA_NAME)
4591     return false;
4592
4593   stmt = SSA_NAME_DEF_STMT (e);
4594   if (gimple_code (stmt) != GIMPLE_ASSIGN
4595       || gimple_assign_rhs_code (stmt) != code)
4596     return false;
4597
4598   switch (get_gimple_rhs_class (code))
4599     {
4600     case GIMPLE_BINARY_RHS:
4601       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4602         return false;
4603       /* Fallthru.  */
4604
4605     case GIMPLE_UNARY_RHS:
4606     case GIMPLE_SINGLE_RHS:
4607       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4608     default:
4609       return false;
4610     }
4611 }
4612
4613 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4614    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4615    calculation is performed in non-wrapping type.
4616
4617    TODO: More generally, we could test for the situation that
4618          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4619          This would require knowing the sign of OFFSET.
4620
4621          Also, we only look for the first addition in the computation of BASE.
4622          More complex analysis would be better, but introducing it just for
4623          this optimization seems like an overkill.  */
4624
4625 static bool
4626 difference_cannot_overflow_p (tree base, tree offset)
4627 {
4628   enum tree_code code;
4629   tree e1, e2;
4630
4631   if (!nowrap_type_p (TREE_TYPE (base)))
4632     return false;
4633
4634   base = expand_simple_operations (base);
4635
4636   if (TREE_CODE (base) == SSA_NAME)
4637     {
4638       gimple stmt = SSA_NAME_DEF_STMT (base);
4639
4640       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4641         return false;
4642
4643       code = gimple_assign_rhs_code (stmt);
4644       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4645         return false;
4646
4647       e1 = gimple_assign_rhs1 (stmt);
4648       e2 = gimple_assign_rhs2 (stmt);
4649     }
4650   else
4651     {
4652       code = TREE_CODE (base);
4653       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4654         return false;
4655       e1 = TREE_OPERAND (base, 0);
4656       e2 = TREE_OPERAND (base, 1);
4657     }
4658
4659   /* TODO: deeper inspection may be necessary to prove the equality.  */
4660   switch (code)
4661     {
4662     case PLUS_EXPR:
4663       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4664     case POINTER_PLUS_EXPR:
4665       return expr_equal_p (e2, offset);
4666
4667     default:
4668       return false;
4669     }
4670 }
4671
4672 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4673    comparison with CAND.  NITER describes the number of iterations of
4674    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4675
4676    We aim to handle the following situation:
4677
4678    sometype *base, *p;
4679    int a, b, i;
4680
4681    i = a;
4682    p = p_0 = base + a;
4683
4684    do
4685      {
4686        bla (*p);
4687        p++;
4688        i++;
4689      }
4690    while (i < b);
4691
4692    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4693    We aim to optimize this to
4694
4695    p = p_0 = base + a;
4696    do
4697      {
4698        bla (*p);
4699        p++;
4700      }
4701    while (p < p_0 - a + b);
4702
4703    This preserves the correctness, since the pointer arithmetics does not
4704    overflow.  More precisely:
4705
4706    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4707       overflow in computing it or the values of p.
4708    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4709       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4710
4711 static bool
4712 iv_elimination_compare_lt (struct ivopts_data *data,
4713                            struct iv_cand *cand, enum tree_code *comp_p,
4714                            struct tree_niter_desc *niter)
4715 {
4716   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4717   struct affine_tree_combination nit, tmpa, tmpb;
4718   enum tree_code comp;
4719   HOST_WIDE_INT step;
4720
4721   /* We need to know that the candidate induction variable does not overflow.
4722      While more complex analysis may be used to prove this, for now just
4723      check that the variable appears in the original program and that it
4724      is computed in a type that guarantees no overflows.  */
4725   cand_type = TREE_TYPE (cand->iv->base);
4726   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4727     return false;
4728
4729   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4730      the calculation of the BOUND could overflow, making the comparison
4731      invalid.  */
4732   if (!data->loop_single_exit_p)
4733     return false;
4734
4735   /* We need to be able to decide whether candidate is increasing or decreasing
4736      in order to choose the right comparison operator.  */
4737   if (!cst_and_fits_in_hwi (cand->iv->step))
4738     return false;
4739   step = int_cst_value (cand->iv->step);
4740
4741   /* Check that the number of iterations matches the expected pattern:
4742      a + 1 > b ? 0 : b - a - 1.  */
4743   mbz = niter->may_be_zero;
4744   if (TREE_CODE (mbz) == GT_EXPR)
4745     {
4746       /* Handle a + 1 > b.  */
4747       tree op0 = TREE_OPERAND (mbz, 0);
4748       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4749         {
4750           a = TREE_OPERAND (op0, 0);
4751           b = TREE_OPERAND (mbz, 1);
4752         }
4753       else
4754         return false;
4755     }
4756   else if (TREE_CODE (mbz) == LT_EXPR)
4757     {
4758       tree op1 = TREE_OPERAND (mbz, 1);
4759
4760       /* Handle b < a + 1.  */
4761       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4762         {
4763           a = TREE_OPERAND (op1, 0);
4764           b = TREE_OPERAND (mbz, 0);
4765         }
4766       else
4767         return false;
4768     }
4769   else
4770     return false;
4771
4772   /* Expected number of iterations is B - A - 1.  Check that it matches
4773      the actual number, i.e., that B - A - NITER = 1.  */
4774   tree_to_aff_combination (niter->niter, nit_type, &nit);
4775   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4776   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4777   aff_combination_scale (&nit, double_int_minus_one);
4778   aff_combination_scale (&tmpa, double_int_minus_one);
4779   aff_combination_add (&tmpb, &tmpa);
4780   aff_combination_add (&tmpb, &nit);
4781   if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4782     return false;
4783
4784   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4785      overflow.  */
4786   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4787                         cand->iv->step,
4788                         fold_convert (TREE_TYPE (cand->iv->step), a));
4789   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4790     return false;
4791
4792   /* Determine the new comparison operator.  */
4793   comp = step < 0 ? GT_EXPR : LT_EXPR;
4794   if (*comp_p == NE_EXPR)
4795     *comp_p = comp;
4796   else if (*comp_p == EQ_EXPR)
4797     *comp_p = invert_tree_comparison (comp, false);
4798   else
4799     gcc_unreachable ();
4800
4801   return true;
4802 }
4803
4804 /* Check whether it is possible to express the condition in USE by comparison
4805    of candidate CAND.  If so, store the value compared with to BOUND, and the
4806    comparison operator to COMP.  */
4807
4808 static bool
4809 may_eliminate_iv (struct ivopts_data *data,
4810                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4811                   enum tree_code *comp)
4812 {
4813   basic_block ex_bb;
4814   edge exit;
4815   tree period;
4816   struct loop *loop = data->current_loop;
4817   aff_tree bnd;
4818   struct tree_niter_desc *desc = NULL;
4819
4820   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4821     return false;
4822
4823   /* For now works only for exits that dominate the loop latch.
4824      TODO: extend to other conditions inside loop body.  */
4825   ex_bb = gimple_bb (use->stmt);
4826   if (use->stmt != last_stmt (ex_bb)
4827       || gimple_code (use->stmt) != GIMPLE_COND
4828       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4829     return false;
4830
4831   exit = EDGE_SUCC (ex_bb, 0);
4832   if (flow_bb_inside_loop_p (loop, exit->dest))
4833     exit = EDGE_SUCC (ex_bb, 1);
4834   if (flow_bb_inside_loop_p (loop, exit->dest))
4835     return false;
4836
4837   desc = niter_for_exit (data, exit);
4838   if (!desc)
4839     return false;
4840
4841   /* Determine whether we can use the variable to test the exit condition.
4842      This is the case iff the period of the induction variable is greater
4843      than the number of iterations for which the exit condition is true.  */
4844   period = iv_period (cand->iv);
4845
4846   /* If the number of iterations is constant, compare against it directly.  */
4847   if (TREE_CODE (desc->niter) == INTEGER_CST)
4848     {
4849       /* See cand_value_at.  */
4850       if (stmt_after_increment (loop, cand, use->stmt))
4851         {
4852           if (!tree_int_cst_lt (desc->niter, period))
4853             return false;
4854         }
4855       else
4856         {
4857           if (tree_int_cst_lt (period, desc->niter))
4858             return false;
4859         }
4860     }
4861
4862   /* If not, and if this is the only possible exit of the loop, see whether
4863      we can get a conservative estimate on the number of iterations of the
4864      entire loop and compare against that instead.  */
4865   else
4866     {
4867       double_int period_value, max_niter;
4868
4869       max_niter = desc->max;
4870       if (stmt_after_increment (loop, cand, use->stmt))
4871         max_niter = double_int_add (max_niter, double_int_one);
4872       period_value = tree_to_double_int (period);
4873       if (double_int_ucmp (max_niter, period_value) > 0)
4874         {
4875           /* See if we can take advantage of inferred loop bound information.  */
4876           if (data->loop_single_exit_p)
4877             {
4878               if (!max_loop_iterations (loop, &max_niter))
4879                 return false;
4880               /* The loop bound is already adjusted by adding 1.  */
4881               if (double_int_ucmp (max_niter, period_value) > 0)
4882                 return false;
4883             }
4884           else
4885             return false;
4886         }
4887     }
4888
4889   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4890
4891   *bound = aff_combination_to_tree (&bnd);
4892   *comp = iv_elimination_compare (data, use);
4893
4894   /* It is unlikely that computing the number of iterations using division
4895      would be more profitable than keeping the original induction variable.  */
4896   if (expression_expensive_p (*bound))
4897     return false;
4898
4899   /* Sometimes, it is possible to handle the situation that the number of
4900      iterations may be zero unless additional assumtions by using <
4901      instead of != in the exit condition.
4902
4903      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4904            base the exit condition on it.  However, that is often too
4905            expensive.  */
4906   if (!integer_zerop (desc->may_be_zero))
4907     return iv_elimination_compare_lt (data, cand, comp, desc);
4908
4909   return true;
4910 }
4911
4912  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4913     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4914
4915 static int
4916 parm_decl_cost (struct ivopts_data *data, tree bound)
4917 {
4918   tree sbound = bound;
4919   STRIP_NOPS (sbound);
4920
4921   if (TREE_CODE (sbound) == SSA_NAME
4922       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4923       && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4924       && data->body_includes_call)
4925     return COSTS_N_INSNS (1);
4926
4927   return 0;
4928 }
4929
4930 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4931
4932 static bool
4933 determine_use_iv_cost_condition (struct ivopts_data *data,
4934                                  struct iv_use *use, struct iv_cand *cand)
4935 {
4936   tree bound = NULL_TREE;
4937   struct iv *cmp_iv;
4938   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4939   comp_cost elim_cost, express_cost, cost, bound_cost;
4940   bool ok;
4941   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4942   tree *control_var, *bound_cst;
4943   enum tree_code comp = ERROR_MARK;
4944
4945   /* Only consider real candidates.  */
4946   if (!cand->iv)
4947     {
4948       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4949                        ERROR_MARK, -1);
4950       return false;
4951     }
4952
4953   /* Try iv elimination.  */
4954   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4955     {
4956       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4957       if (elim_cost.cost == 0)
4958         elim_cost.cost = parm_decl_cost (data, bound);
4959       else if (TREE_CODE (bound) == INTEGER_CST)
4960         elim_cost.cost = 0;
4961       /* If we replace a loop condition 'i < n' with 'p < base + n',
4962          depends_on_elim will have 'base' and 'n' set, which implies
4963          that both 'base' and 'n' will be live during the loop.  More likely,
4964          'base + n' will be loop invariant, resulting in only one live value
4965          during the loop.  So in that case we clear depends_on_elim and set
4966         elim_inv_expr_id instead.  */
4967       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4968         {
4969           elim_inv_expr_id = get_expr_id (data, bound);
4970           bitmap_clear (depends_on_elim);
4971         }
4972       /* The bound is a loop invariant, so it will be only computed
4973          once.  */
4974       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4975     }
4976   else
4977     elim_cost = infinite_cost;
4978
4979   /* Try expressing the original giv.  If it is compared with an invariant,
4980      note that we cannot get rid of it.  */
4981   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4982                               NULL, &cmp_iv);
4983   gcc_assert (ok);
4984
4985   /* When the condition is a comparison of the candidate IV against
4986      zero, prefer this IV.
4987
4988      TODO: The constant that we're subtracting from the cost should
4989      be target-dependent.  This information should be added to the
4990      target costs for each backend.  */
4991   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4992       && integer_zerop (*bound_cst)
4993       && (operand_equal_p (*control_var, cand->var_after, 0)
4994           || operand_equal_p (*control_var, cand->var_before, 0)))
4995     elim_cost.cost -= 1;
4996
4997   express_cost = get_computation_cost (data, use, cand, false,
4998                                        &depends_on_express, NULL,
4999                                        &express_inv_expr_id);
5000   fd_ivopts_data = data;
5001   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5002
5003   /* Count the cost of the original bound as well.  */
5004   bound_cost = force_var_cost (data, *bound_cst, NULL);
5005   if (bound_cost.cost == 0)
5006     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5007   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5008     bound_cost.cost = 0;
5009   express_cost.cost += bound_cost.cost;
5010
5011   /* Choose the better approach, preferring the eliminated IV. */
5012   if (compare_costs (elim_cost, express_cost) <= 0)
5013     {
5014       cost = elim_cost;
5015       depends_on = depends_on_elim;
5016       depends_on_elim = NULL;
5017       inv_expr_id = elim_inv_expr_id;
5018     }
5019   else
5020     {
5021       cost = express_cost;
5022       depends_on = depends_on_express;
5023       depends_on_express = NULL;
5024       bound = NULL_TREE;
5025       comp = ERROR_MARK;
5026       inv_expr_id = express_inv_expr_id;
5027     }
5028
5029   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5030
5031   if (depends_on_elim)
5032     BITMAP_FREE (depends_on_elim);
5033   if (depends_on_express)
5034     BITMAP_FREE (depends_on_express);
5035
5036   return !infinite_cost_p (cost);
5037 }
5038
5039 /* Determines cost of basing replacement of USE on CAND.  Returns false
5040    if USE cannot be based on CAND.  */
5041
5042 static bool
5043 determine_use_iv_cost (struct ivopts_data *data,
5044                        struct iv_use *use, struct iv_cand *cand)
5045 {
5046   switch (use->type)
5047     {
5048     case USE_NONLINEAR_EXPR:
5049       return determine_use_iv_cost_generic (data, use, cand);
5050
5051     case USE_ADDRESS:
5052       return determine_use_iv_cost_address (data, use, cand);
5053
5054     case USE_COMPARE:
5055       return determine_use_iv_cost_condition (data, use, cand);
5056
5057     default:
5058       gcc_unreachable ();
5059     }
5060 }
5061
5062 /* Return true if get_computation_cost indicates that autoincrement is
5063    a possibility for the pair of USE and CAND, false otherwise.  */
5064
5065 static bool
5066 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5067                            struct iv_cand *cand)
5068 {
5069   bitmap depends_on;
5070   bool can_autoinc;
5071   comp_cost cost;
5072
5073   if (use->type != USE_ADDRESS)
5074     return false;
5075
5076   cost = get_computation_cost (data, use, cand, true, &depends_on,
5077                                &can_autoinc, NULL);
5078
5079   BITMAP_FREE (depends_on);
5080
5081   return !infinite_cost_p (cost) && can_autoinc;
5082 }
5083
5084 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5085    use that allows autoincrement, and set their AINC_USE if possible.  */
5086
5087 static void
5088 set_autoinc_for_original_candidates (struct ivopts_data *data)
5089 {
5090   unsigned i, j;
5091
5092   for (i = 0; i < n_iv_cands (data); i++)
5093     {
5094       struct iv_cand *cand = iv_cand (data, i);
5095       struct iv_use *closest = NULL;
5096       if (cand->pos != IP_ORIGINAL)
5097         continue;
5098       for (j = 0; j < n_iv_uses (data); j++)
5099         {
5100           struct iv_use *use = iv_use (data, j);
5101           unsigned uid = gimple_uid (use->stmt);
5102           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
5103               || uid > gimple_uid (cand->incremented_at))
5104             continue;
5105           if (closest == NULL || uid > gimple_uid (closest->stmt))
5106             closest = use;
5107         }
5108       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
5109         continue;
5110       cand->ainc_use = closest;
5111     }
5112 }
5113
5114 /* Finds the candidates for the induction variables.  */
5115
5116 static void
5117 find_iv_candidates (struct ivopts_data *data)
5118 {
5119   /* Add commonly used ivs.  */
5120   add_standard_iv_candidates (data);
5121
5122   /* Add old induction variables.  */
5123   add_old_ivs_candidates (data);
5124
5125   /* Add induction variables derived from uses.  */
5126   add_derived_ivs_candidates (data);
5127
5128   set_autoinc_for_original_candidates (data);
5129
5130   /* Record the important candidates.  */
5131   record_important_candidates (data);
5132 }
5133
5134 /* Determines costs of basing the use of the iv on an iv candidate.  */
5135
5136 static void
5137 determine_use_iv_costs (struct ivopts_data *data)
5138 {
5139   unsigned i, j;
5140   struct iv_use *use;
5141   struct iv_cand *cand;
5142   bitmap to_clear = BITMAP_ALLOC (NULL);
5143
5144   alloc_use_cost_map (data);
5145
5146   for (i = 0; i < n_iv_uses (data); i++)
5147     {
5148       use = iv_use (data, i);
5149
5150       if (data->consider_all_candidates)
5151         {
5152           for (j = 0; j < n_iv_cands (data); j++)
5153             {
5154               cand = iv_cand (data, j);
5155               determine_use_iv_cost (data, use, cand);
5156             }
5157         }
5158       else
5159         {
5160           bitmap_iterator bi;
5161
5162           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5163             {
5164               cand = iv_cand (data, j);
5165               if (!determine_use_iv_cost (data, use, cand))
5166                 bitmap_set_bit (to_clear, j);
5167             }
5168
5169           /* Remove the candidates for that the cost is infinite from
5170              the list of related candidates.  */
5171           bitmap_and_compl_into (use->related_cands, to_clear);
5172           bitmap_clear (to_clear);
5173         }
5174     }
5175
5176   BITMAP_FREE (to_clear);
5177
5178   if (dump_file && (dump_flags & TDF_DETAILS))
5179     {
5180       fprintf (dump_file, "Use-candidate costs:\n");
5181
5182       for (i = 0; i < n_iv_uses (data); i++)
5183         {
5184           use = iv_use (data, i);
5185
5186           fprintf (dump_file, "Use %d:\n", i);
5187           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5188           for (j = 0; j < use->n_map_members; j++)
5189             {
5190               if (!use->cost_map[j].cand
5191                   || infinite_cost_p (use->cost_map[j].cost))
5192                 continue;
5193
5194               fprintf (dump_file, "  %d\t%d\t%d\t",
5195                        use->cost_map[j].cand->id,
5196                        use->cost_map[j].cost.cost,
5197                        use->cost_map[j].cost.complexity);
5198               if (use->cost_map[j].depends_on)
5199                 bitmap_print (dump_file,
5200                               use->cost_map[j].depends_on, "","");
5201               if (use->cost_map[j].inv_expr_id != -1)
5202                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5203               fprintf (dump_file, "\n");
5204             }
5205
5206           fprintf (dump_file, "\n");
5207         }
5208       fprintf (dump_file, "\n");
5209     }
5210 }
5211
5212 /* Determines cost of the candidate CAND.  */
5213
5214 static void
5215 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5216 {
5217   comp_cost cost_base;
5218   unsigned cost, cost_step;
5219   tree base;
5220
5221   if (!cand->iv)
5222     {
5223       cand->cost = 0;
5224       return;
5225     }
5226
5227   /* There are two costs associated with the candidate -- its increment
5228      and its initialization.  The second is almost negligible for any loop
5229      that rolls enough, so we take it just very little into account.  */
5230
5231   base = cand->iv->base;
5232   cost_base = force_var_cost (data, base, NULL);
5233   /* It will be exceptional that the iv register happens to be initialized with
5234      the proper value at no cost.  In general, there will at least be a regcopy
5235      or a const set.  */
5236   if (cost_base.cost == 0)
5237     cost_base.cost = COSTS_N_INSNS (1);
5238   cost_step = add_regs_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
5239
5240   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5241
5242   /* Prefer the original ivs unless we may gain something by replacing it.
5243      The reason is to make debugging simpler; so this is not relevant for
5244      artificial ivs created by other optimization passes.  */
5245   if (cand->pos != IP_ORIGINAL
5246       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5247     cost++;
5248
5249   /* Prefer not to insert statements into latch unless there are some
5250      already (so that we do not create unnecessary jumps).  */
5251   if (cand->pos == IP_END
5252       && empty_block_p (ip_end_pos (data->current_loop)))
5253     cost++;
5254
5255   cand->cost = cost;
5256   cand->cost_step = cost_step;
5257 }
5258
5259 /* Determines costs of computation of the candidates.  */
5260
5261 static void
5262 determine_iv_costs (struct ivopts_data *data)
5263 {
5264   unsigned i;
5265
5266   if (dump_file && (dump_flags & TDF_DETAILS))
5267     {
5268       fprintf (dump_file, "Candidate costs:\n");
5269       fprintf (dump_file, "  cand\tcost\n");
5270     }
5271
5272   for (i = 0; i < n_iv_cands (data); i++)
5273     {
5274       struct iv_cand *cand = iv_cand (data, i);
5275
5276       determine_iv_cost (data, cand);
5277
5278       if (dump_file && (dump_flags & TDF_DETAILS))
5279         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5280     }
5281
5282   if (dump_file && (dump_flags & TDF_DETAILS))
5283     fprintf (dump_file, "\n");
5284 }
5285
5286 /* Calculates cost for having SIZE induction variables.  */
5287
5288 static unsigned
5289 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5290 {
5291   /* We add size to the cost, so that we prefer eliminating ivs
5292      if possible.  */
5293   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5294                                             data->body_includes_call);
5295 }
5296
5297 /* For each size of the induction variable set determine the penalty.  */
5298
5299 static void
5300 determine_set_costs (struct ivopts_data *data)
5301 {
5302   unsigned j, n;
5303   gimple phi;
5304   gimple_stmt_iterator psi;
5305   tree op;
5306   struct loop *loop = data->current_loop;
5307   bitmap_iterator bi;
5308
5309   if (dump_file && (dump_flags & TDF_DETAILS))
5310     {
5311       fprintf (dump_file, "Global costs:\n");
5312       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5313       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5314       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5315       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5316     }
5317
5318   n = 0;
5319   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5320     {
5321       phi = gsi_stmt (psi);
5322       op = PHI_RESULT (phi);
5323
5324       if (!is_gimple_reg (op))
5325         continue;
5326
5327       if (get_iv (data, op))
5328         continue;
5329
5330       n++;
5331     }
5332
5333   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5334     {
5335       struct version_info *info = ver_info (data, j);
5336
5337       if (info->inv_id && info->has_nonlin_use)
5338         n++;
5339     }
5340
5341   data->regs_used = n;
5342   if (dump_file && (dump_flags & TDF_DETAILS))
5343     fprintf (dump_file, "  regs_used %d\n", n);
5344
5345   if (dump_file && (dump_flags & TDF_DETAILS))
5346     {
5347       fprintf (dump_file, "  cost for size:\n");
5348       fprintf (dump_file, "  ivs\tcost\n");
5349       for (j = 0; j <= 2 * target_avail_regs; j++)
5350         fprintf (dump_file, "  %d\t%d\n", j,
5351                  ivopts_global_cost_for_size (data, j));
5352       fprintf (dump_file, "\n");
5353     }
5354 }
5355
5356 /* Returns true if A is a cheaper cost pair than B.  */
5357
5358 static bool
5359 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5360 {
5361   int cmp;
5362
5363   if (!a)
5364     return false;
5365
5366   if (!b)
5367     return true;
5368
5369   cmp = compare_costs (a->cost, b->cost);
5370   if (cmp < 0)
5371     return true;
5372
5373   if (cmp > 0)
5374     return false;
5375
5376   /* In case the costs are the same, prefer the cheaper candidate.  */
5377   if (a->cand->cost < b->cand->cost)
5378     return true;
5379
5380   return false;
5381 }
5382
5383
5384 /* Returns candidate by that USE is expressed in IVS.  */
5385
5386 static struct cost_pair *
5387 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5388 {
5389   return ivs->cand_for_use[use->id];
5390 }
5391
5392 /* Computes the cost field of IVS structure.  */
5393
5394 static void
5395 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5396 {
5397   comp_cost cost = ivs->cand_use_cost;
5398
5399   cost.cost += ivs->cand_cost;
5400
5401   cost.cost += ivopts_global_cost_for_size (data,
5402                                             ivs->n_regs + ivs->num_used_inv_expr);
5403
5404   ivs->cost = cost;
5405 }
5406
5407 /* Remove invariants in set INVS to set IVS.  */
5408
5409 static void
5410 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5411 {
5412   bitmap_iterator bi;
5413   unsigned iid;
5414
5415   if (!invs)
5416     return;
5417
5418   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5419     {
5420       ivs->n_invariant_uses[iid]--;
5421       if (ivs->n_invariant_uses[iid] == 0)
5422         ivs->n_regs--;
5423     }
5424 }
5425
5426 /* Set USE not to be expressed by any candidate in IVS.  */
5427
5428 static void
5429 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5430                  struct iv_use *use)
5431 {
5432   unsigned uid = use->id, cid;
5433   struct cost_pair *cp;
5434
5435   cp = ivs->cand_for_use[uid];
5436   if (!cp)
5437     return;
5438   cid = cp->cand->id;
5439
5440   ivs->bad_uses++;
5441   ivs->cand_for_use[uid] = NULL;
5442   ivs->n_cand_uses[cid]--;
5443
5444   if (ivs->n_cand_uses[cid] == 0)
5445     {
5446       bitmap_clear_bit (ivs->cands, cid);
5447       /* Do not count the pseudocandidates.  */
5448       if (cp->cand->iv)
5449         ivs->n_regs--;
5450       ivs->n_cands--;
5451       ivs->cand_cost -= cp->cand->cost;
5452
5453       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5454     }
5455
5456   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5457
5458   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5459
5460   if (cp->inv_expr_id != -1)
5461     {
5462       ivs->used_inv_expr[cp->inv_expr_id]--;
5463       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5464         ivs->num_used_inv_expr--;
5465     }
5466   iv_ca_recount_cost (data, ivs);
5467 }
5468
5469 /* Add invariants in set INVS to set IVS.  */
5470
5471 static void
5472 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5473 {
5474   bitmap_iterator bi;
5475   unsigned iid;
5476
5477   if (!invs)
5478     return;
5479
5480   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5481     {
5482       ivs->n_invariant_uses[iid]++;
5483       if (ivs->n_invariant_uses[iid] == 1)
5484         ivs->n_regs++;
5485     }
5486 }
5487
5488 /* Set cost pair for USE in set IVS to CP.  */
5489
5490 static void
5491 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5492               struct iv_use *use, struct cost_pair *cp)
5493 {
5494   unsigned uid = use->id, cid;
5495
5496   if (ivs->cand_for_use[uid] == cp)
5497     return;
5498
5499   if (ivs->cand_for_use[uid])
5500     iv_ca_set_no_cp (data, ivs, use);
5501
5502   if (cp)
5503     {
5504       cid = cp->cand->id;
5505
5506       ivs->bad_uses--;
5507       ivs->cand_for_use[uid] = cp;
5508       ivs->n_cand_uses[cid]++;
5509       if (ivs->n_cand_uses[cid] == 1)
5510         {
5511           bitmap_set_bit (ivs->cands, cid);
5512           /* Do not count the pseudocandidates.  */
5513           if (cp->cand->iv)
5514             ivs->n_regs++;
5515           ivs->n_cands++;
5516           ivs->cand_cost += cp->cand->cost;
5517
5518           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5519         }
5520
5521       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5522       iv_ca_set_add_invariants (ivs, cp->depends_on);
5523
5524       if (cp->inv_expr_id != -1)
5525         {
5526           ivs->used_inv_expr[cp->inv_expr_id]++;
5527           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5528             ivs->num_used_inv_expr++;
5529         }
5530       iv_ca_recount_cost (data, ivs);
5531     }
5532 }
5533
5534 /* Extend set IVS by expressing USE by some of the candidates in it
5535    if possible. All important candidates will be considered
5536    if IMPORTANT_CANDIDATES is true.  */
5537
5538 static void
5539 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5540                struct iv_use *use, bool important_candidates)
5541 {
5542   struct cost_pair *best_cp = NULL, *cp;
5543   bitmap_iterator bi;
5544   bitmap cands;
5545   unsigned i;
5546
5547   gcc_assert (ivs->upto >= use->id);
5548
5549   if (ivs->upto == use->id)
5550     {
5551       ivs->upto++;
5552       ivs->bad_uses++;
5553     }
5554
5555   cands = (important_candidates ? data->important_candidates : ivs->cands);
5556   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5557     {
5558       struct iv_cand *cand = iv_cand (data, i);
5559
5560       cp = get_use_iv_cost (data, use, cand);
5561
5562       if (cheaper_cost_pair (cp, best_cp))
5563         best_cp = cp;
5564     }
5565
5566   iv_ca_set_cp (data, ivs, use, best_cp);
5567 }
5568
5569 /* Get cost for assignment IVS.  */
5570
5571 static comp_cost
5572 iv_ca_cost (struct iv_ca *ivs)
5573 {
5574   /* This was a conditional expression but it triggered a bug in
5575      Sun C 5.5.  */
5576   if (ivs->bad_uses)
5577     return infinite_cost;
5578   else
5579     return ivs->cost;
5580 }
5581
5582 /* Returns true if all dependences of CP are among invariants in IVS.  */
5583
5584 static bool
5585 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5586 {
5587   unsigned i;
5588   bitmap_iterator bi;
5589
5590   if (!cp->depends_on)
5591     return true;
5592
5593   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5594     {
5595       if (ivs->n_invariant_uses[i] == 0)
5596         return false;
5597     }
5598
5599   return true;
5600 }
5601
5602 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5603    it before NEXT_CHANGE.  */
5604
5605 static struct iv_ca_delta *
5606 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5607                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5608 {
5609   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5610
5611   change->use = use;
5612   change->old_cp = old_cp;
5613   change->new_cp = new_cp;
5614   change->next_change = next_change;
5615
5616   return change;
5617 }
5618
5619 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5620    are rewritten.  */
5621
5622 static struct iv_ca_delta *
5623 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5624 {
5625   struct iv_ca_delta *last;
5626
5627   if (!l2)
5628     return l1;
5629
5630   if (!l1)
5631     return l2;
5632
5633   for (last = l1; last->next_change; last = last->next_change)
5634     continue;
5635   last->next_change = l2;
5636
5637   return l1;
5638 }
5639
5640 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5641
5642 static struct iv_ca_delta *
5643 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5644 {
5645   struct iv_ca_delta *act, *next, *prev = NULL;
5646   struct cost_pair *tmp;
5647
5648   for (act = delta; act; act = next)
5649     {
5650       next = act->next_change;
5651       act->next_change = prev;
5652       prev = act;
5653
5654       tmp = act->old_cp;
5655       act->old_cp = act->new_cp;
5656       act->new_cp = tmp;
5657     }
5658
5659   return prev;
5660 }
5661
5662 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5663    reverted instead.  */
5664
5665 static void
5666 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5667                     struct iv_ca_delta *delta, bool forward)
5668 {
5669   struct cost_pair *from, *to;
5670   struct iv_ca_delta *act;
5671
5672   if (!forward)
5673     delta = iv_ca_delta_reverse (delta);
5674
5675   for (act = delta; act; act = act->next_change)
5676     {
5677       from = act->old_cp;
5678       to = act->new_cp;
5679       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5680       iv_ca_set_cp (data, ivs, act->use, to);
5681     }
5682
5683   if (!forward)
5684     iv_ca_delta_reverse (delta);
5685 }
5686
5687 /* Returns true if CAND is used in IVS.  */
5688
5689 static bool
5690 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5691 {
5692   return ivs->n_cand_uses[cand->id] > 0;
5693 }
5694
5695 /* Returns number of induction variable candidates in the set IVS.  */
5696
5697 static unsigned
5698 iv_ca_n_cands (struct iv_ca *ivs)
5699 {
5700   return ivs->n_cands;
5701 }
5702
5703 /* Free the list of changes DELTA.  */
5704
5705 static void
5706 iv_ca_delta_free (struct iv_ca_delta **delta)
5707 {
5708   struct iv_ca_delta *act, *next;
5709
5710   for (act = *delta; act; act = next)
5711     {
5712       next = act->next_change;
5713       free (act);
5714     }
5715
5716   *delta = NULL;
5717 }
5718
5719 /* Allocates new iv candidates assignment.  */
5720
5721 static struct iv_ca *
5722 iv_ca_new (struct ivopts_data *data)
5723 {
5724   struct iv_ca *nw = XNEW (struct iv_ca);
5725
5726   nw->upto = 0;
5727   nw->bad_uses = 0;
5728   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5729   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5730   nw->cands = BITMAP_ALLOC (NULL);
5731   nw->n_cands = 0;
5732   nw->n_regs = 0;
5733   nw->cand_use_cost = no_cost;
5734   nw->cand_cost = 0;
5735   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5736   nw->cost = no_cost;
5737   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5738   nw->num_used_inv_expr = 0;
5739
5740   return nw;
5741 }
5742
5743 /* Free memory occupied by the set IVS.  */
5744
5745 static void
5746 iv_ca_free (struct iv_ca **ivs)
5747 {
5748   free ((*ivs)->cand_for_use);
5749   free ((*ivs)->n_cand_uses);
5750   BITMAP_FREE ((*ivs)->cands);
5751   free ((*ivs)->n_invariant_uses);
5752   free ((*ivs)->used_inv_expr);
5753   free (*ivs);
5754   *ivs = NULL;
5755 }
5756
5757 /* Dumps IVS to FILE.  */
5758
5759 static void
5760 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5761 {
5762   const char *pref = "  invariants ";
5763   unsigned i;
5764   comp_cost cost = iv_ca_cost (ivs);
5765
5766   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5767   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5768            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5769   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5770
5771    for (i = 0; i < ivs->upto; i++)
5772     {
5773       struct iv_use *use = iv_use (data, i);
5774       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5775       if (cp)
5776         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5777                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5778       else
5779         fprintf (file, "   use:%d --> ??\n", use->id);
5780     }
5781
5782   for (i = 1; i <= data->max_inv_id; i++)
5783     if (ivs->n_invariant_uses[i])
5784       {
5785         fprintf (file, "%s%d", pref, i);
5786         pref = ", ";
5787       }
5788   fprintf (file, "\n\n");
5789 }
5790
5791 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5792    new set, and store differences in DELTA.  Number of induction variables
5793    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5794    the function will try to find a solution with mimimal iv candidates.  */
5795
5796 static comp_cost
5797 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5798               struct iv_cand *cand, struct iv_ca_delta **delta,
5799               unsigned *n_ivs, bool min_ncand)
5800 {
5801   unsigned i;
5802   comp_cost cost;
5803   struct iv_use *use;
5804   struct cost_pair *old_cp, *new_cp;
5805
5806   *delta = NULL;
5807   for (i = 0; i < ivs->upto; i++)
5808     {
5809       use = iv_use (data, i);
5810       old_cp = iv_ca_cand_for_use (ivs, use);
5811
5812       if (old_cp
5813           && old_cp->cand == cand)
5814         continue;
5815
5816       new_cp = get_use_iv_cost (data, use, cand);
5817       if (!new_cp)
5818         continue;
5819
5820       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5821         continue;
5822
5823       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5824         continue;
5825
5826       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5827     }
5828
5829   iv_ca_delta_commit (data, ivs, *delta, true);
5830   cost = iv_ca_cost (ivs);
5831   if (n_ivs)
5832     *n_ivs = iv_ca_n_cands (ivs);
5833   iv_ca_delta_commit (data, ivs, *delta, false);
5834
5835   return cost;
5836 }
5837
5838 /* Try narrowing set IVS by removing CAND.  Return the cost of
5839    the new set and store the differences in DELTA.  */
5840
5841 static comp_cost
5842 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5843               struct iv_cand *cand, struct iv_ca_delta **delta)
5844 {
5845   unsigned i, ci;
5846   struct iv_use *use;
5847   struct cost_pair *old_cp, *new_cp, *cp;
5848   bitmap_iterator bi;
5849   struct iv_cand *cnd;
5850   comp_cost cost;
5851
5852   *delta = NULL;
5853   for (i = 0; i < n_iv_uses (data); i++)
5854     {
5855       use = iv_use (data, i);
5856
5857       old_cp = iv_ca_cand_for_use (ivs, use);
5858       if (old_cp->cand != cand)
5859         continue;
5860
5861       new_cp = NULL;
5862
5863       if (data->consider_all_candidates)
5864         {
5865           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5866             {
5867               if (ci == cand->id)
5868                 continue;
5869
5870               cnd = iv_cand (data, ci);
5871
5872               cp = get_use_iv_cost (data, use, cnd);
5873               if (!cp)
5874                 continue;
5875
5876               if (!iv_ca_has_deps (ivs, cp))
5877                 continue;
5878
5879               if (!cheaper_cost_pair (cp, new_cp))
5880                 continue;
5881
5882               new_cp = cp;
5883             }
5884         }
5885       else
5886         {
5887           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5888             {
5889               if (ci == cand->id)
5890                 continue;
5891
5892               cnd = iv_cand (data, ci);
5893
5894               cp = get_use_iv_cost (data, use, cnd);
5895               if (!cp)
5896                 continue;
5897               if (!iv_ca_has_deps (ivs, cp))
5898                 continue;
5899
5900               if (!cheaper_cost_pair (cp, new_cp))
5901                 continue;
5902
5903               new_cp = cp;
5904             }
5905         }
5906
5907       if (!new_cp)
5908         {
5909           iv_ca_delta_free (delta);
5910           return infinite_cost;
5911         }
5912
5913       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5914     }
5915
5916   iv_ca_delta_commit (data, ivs, *delta, true);
5917   cost = iv_ca_cost (ivs);
5918   iv_ca_delta_commit (data, ivs, *delta, false);
5919
5920   return cost;
5921 }
5922
5923 /* Try optimizing the set of candidates IVS by removing candidates different
5924    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5925    differences in DELTA.  */
5926
5927 static comp_cost
5928 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5929              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5930 {
5931   bitmap_iterator bi;
5932   struct iv_ca_delta *act_delta, *best_delta;
5933   unsigned i;
5934   comp_cost best_cost, acost;
5935   struct iv_cand *cand;
5936
5937   best_delta = NULL;
5938   best_cost = iv_ca_cost (ivs);
5939
5940   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5941     {
5942       cand = iv_cand (data, i);
5943
5944       if (cand == except_cand)
5945         continue;
5946
5947       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5948
5949       if (compare_costs (acost, best_cost) < 0)
5950         {
5951           best_cost = acost;
5952           iv_ca_delta_free (&best_delta);
5953           best_delta = act_delta;
5954         }
5955       else
5956         iv_ca_delta_free (&act_delta);
5957     }
5958
5959   if (!best_delta)
5960     {
5961       *delta = NULL;
5962       return best_cost;
5963     }
5964
5965   /* Recurse to possibly remove other unnecessary ivs.  */
5966   iv_ca_delta_commit (data, ivs, best_delta, true);
5967   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5968   iv_ca_delta_commit (data, ivs, best_delta, false);
5969   *delta = iv_ca_delta_join (best_delta, *delta);
5970   return best_cost;
5971 }
5972
5973 /* Tries to extend the sets IVS in the best possible way in order
5974    to express the USE.  If ORIGINALP is true, prefer candidates from
5975    the original set of IVs, otherwise favor important candidates not
5976    based on any memory object.  */
5977
5978 static bool
5979 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5980                   struct iv_use *use, bool originalp)
5981 {
5982   comp_cost best_cost, act_cost;
5983   unsigned i;
5984   bitmap_iterator bi;
5985   struct iv_cand *cand;
5986   struct iv_ca_delta *best_delta = NULL, *act_delta;
5987   struct cost_pair *cp;
5988
5989   iv_ca_add_use (data, ivs, use, false);
5990   best_cost = iv_ca_cost (ivs);
5991
5992   cp = iv_ca_cand_for_use (ivs, use);
5993   if (!cp)
5994     {
5995       ivs->upto--;
5996       ivs->bad_uses--;
5997       iv_ca_add_use (data, ivs, use, true);
5998       best_cost = iv_ca_cost (ivs);
5999       cp = iv_ca_cand_for_use (ivs, use);
6000     }
6001   if (cp)
6002     {
6003       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6004       iv_ca_set_no_cp (data, ivs, use);
6005     }
6006
6007   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6008      first try important candidates not based on any memory object.  Only if
6009      this fails, try the specific ones.  Rationale -- in loops with many
6010      variables the best choice often is to use just one generic biv.  If we
6011      added here many ivs specific to the uses, the optimization algorithm later
6012      would be likely to get stuck in a local minimum, thus causing us to create
6013      too many ivs.  The approach from few ivs to more seems more likely to be
6014      successful -- starting from few ivs, replacing an expensive use by a
6015      specific iv should always be a win.  */
6016   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6017     {
6018       cand = iv_cand (data, i);
6019
6020       if (originalp && cand->pos !=IP_ORIGINAL)
6021         continue;
6022
6023       if (!originalp && cand->iv->base_object != NULL_TREE)
6024         continue;
6025
6026       if (iv_ca_cand_used_p (ivs, cand))
6027         continue;
6028
6029       cp = get_use_iv_cost (data, use, cand);
6030       if (!cp)
6031         continue;
6032
6033       iv_ca_set_cp (data, ivs, use, cp);
6034       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6035                                true);
6036       iv_ca_set_no_cp (data, ivs, use);
6037       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6038
6039       if (compare_costs (act_cost, best_cost) < 0)
6040         {
6041           best_cost = act_cost;
6042
6043           iv_ca_delta_free (&best_delta);
6044           best_delta = act_delta;
6045         }
6046       else
6047         iv_ca_delta_free (&act_delta);
6048     }
6049
6050   if (infinite_cost_p (best_cost))
6051     {
6052       for (i = 0; i < use->n_map_members; i++)
6053         {
6054           cp = use->cost_map + i;
6055           cand = cp->cand;
6056           if (!cand)
6057             continue;
6058
6059           /* Already tried this.  */
6060           if (cand->important)
6061             {
6062               if (originalp && cand->pos == IP_ORIGINAL)
6063                 continue;
6064               if (!originalp && cand->iv->base_object == NULL_TREE)
6065                 continue;
6066             }
6067
6068           if (iv_ca_cand_used_p (ivs, cand))
6069             continue;
6070
6071           act_delta = NULL;
6072           iv_ca_set_cp (data, ivs, use, cp);
6073           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6074           iv_ca_set_no_cp (data, ivs, use);
6075           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6076                                        cp, act_delta);
6077
6078           if (compare_costs (act_cost, best_cost) < 0)
6079             {
6080               best_cost = act_cost;
6081
6082               if (best_delta)
6083                 iv_ca_delta_free (&best_delta);
6084               best_delta = act_delta;
6085             }
6086           else
6087             iv_ca_delta_free (&act_delta);
6088         }
6089     }
6090
6091   iv_ca_delta_commit (data, ivs, best_delta, true);
6092   iv_ca_delta_free (&best_delta);
6093
6094   return !infinite_cost_p (best_cost);
6095 }
6096
6097 /* Finds an initial assignment of candidates to uses.  */
6098
6099 static struct iv_ca *
6100 get_initial_solution (struct ivopts_data *data, bool originalp)
6101 {
6102   struct iv_ca *ivs = iv_ca_new (data);
6103   unsigned i;
6104
6105   for (i = 0; i < n_iv_uses (data); i++)
6106     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6107       {
6108         iv_ca_free (&ivs);
6109         return NULL;
6110       }
6111
6112   return ivs;
6113 }
6114
6115 /* Tries to improve set of induction variables IVS.  */
6116
6117 static bool
6118 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6119 {
6120   unsigned i, n_ivs;
6121   comp_cost acost, best_cost = iv_ca_cost (ivs);
6122   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6123   struct iv_cand *cand;
6124
6125   /* Try extending the set of induction variables by one.  */
6126   for (i = 0; i < n_iv_cands (data); i++)
6127     {
6128       cand = iv_cand (data, i);
6129
6130       if (iv_ca_cand_used_p (ivs, cand))
6131         continue;
6132
6133       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6134       if (!act_delta)
6135         continue;
6136
6137       /* If we successfully added the candidate and the set is small enough,
6138          try optimizing it by removing other candidates.  */
6139       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6140         {
6141           iv_ca_delta_commit (data, ivs, act_delta, true);
6142           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6143           iv_ca_delta_commit (data, ivs, act_delta, false);
6144           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6145         }
6146
6147       if (compare_costs (acost, best_cost) < 0)
6148         {
6149           best_cost = acost;
6150           iv_ca_delta_free (&best_delta);
6151           best_delta = act_delta;
6152         }
6153       else
6154         iv_ca_delta_free (&act_delta);
6155     }
6156
6157   if (!best_delta)
6158     {
6159       /* Try removing the candidates from the set instead.  */
6160       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6161
6162       /* Nothing more we can do.  */
6163       if (!best_delta)
6164         return false;
6165     }
6166
6167   iv_ca_delta_commit (data, ivs, best_delta, true);
6168   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6169   iv_ca_delta_free (&best_delta);
6170   return true;
6171 }
6172
6173 /* Attempts to find the optimal set of induction variables.  We do simple
6174    greedy heuristic -- we try to replace at most one candidate in the selected
6175    solution and remove the unused ivs while this improves the cost.  */
6176
6177 static struct iv_ca *
6178 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6179 {
6180   struct iv_ca *set;
6181
6182   /* Get the initial solution.  */
6183   set = get_initial_solution (data, originalp);
6184   if (!set)
6185     {
6186       if (dump_file && (dump_flags & TDF_DETAILS))
6187         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6188       return NULL;
6189     }
6190
6191   if (dump_file && (dump_flags & TDF_DETAILS))
6192     {
6193       fprintf (dump_file, "Initial set of candidates:\n");
6194       iv_ca_dump (data, dump_file, set);
6195     }
6196
6197   while (try_improve_iv_set (data, set))
6198     {
6199       if (dump_file && (dump_flags & TDF_DETAILS))
6200         {
6201           fprintf (dump_file, "Improved to:\n");
6202           iv_ca_dump (data, dump_file, set);
6203         }
6204     }
6205
6206   return set;
6207 }
6208
6209 static struct iv_ca *
6210 find_optimal_iv_set (struct ivopts_data *data)
6211 {
6212   unsigned i;
6213   struct iv_ca *set, *origset;
6214   struct iv_use *use;
6215   comp_cost cost, origcost;
6216
6217   /* Determine the cost based on a strategy that starts with original IVs,
6218      and try again using a strategy that prefers candidates not based
6219      on any IVs.  */
6220   origset = find_optimal_iv_set_1 (data, true);
6221   set = find_optimal_iv_set_1 (data, false);
6222
6223   if (!origset && !set)
6224     return NULL;
6225
6226   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6227   cost = set ? iv_ca_cost (set) : infinite_cost;
6228
6229   if (dump_file && (dump_flags & TDF_DETAILS))
6230     {
6231       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6232                origcost.cost, origcost.complexity);
6233       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6234                cost.cost, cost.complexity);
6235     }
6236
6237   /* Choose the one with the best cost.  */
6238   if (compare_costs (origcost, cost) <= 0)
6239     {
6240       if (set)
6241         iv_ca_free (&set);
6242       set = origset;
6243     }
6244   else if (origset)
6245     iv_ca_free (&origset);
6246
6247   for (i = 0; i < n_iv_uses (data); i++)
6248     {
6249       use = iv_use (data, i);
6250       use->selected = iv_ca_cand_for_use (set, use)->cand;
6251     }
6252
6253   return set;
6254 }
6255
6256 /* Creates a new induction variable corresponding to CAND.  */
6257
6258 static void
6259 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6260 {
6261   gimple_stmt_iterator incr_pos;
6262   tree base;
6263   bool after = false;
6264
6265   if (!cand->iv)
6266     return;
6267
6268   switch (cand->pos)
6269     {
6270     case IP_NORMAL:
6271       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6272       break;
6273
6274     case IP_END:
6275       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6276       after = true;
6277       break;
6278
6279     case IP_AFTER_USE:
6280       after = true;
6281       /* fall through */
6282     case IP_BEFORE_USE:
6283       incr_pos = gsi_for_stmt (cand->incremented_at);
6284       break;
6285
6286     case IP_ORIGINAL:
6287       /* Mark that the iv is preserved.  */
6288       name_info (data, cand->var_before)->preserve_biv = true;
6289       name_info (data, cand->var_after)->preserve_biv = true;
6290
6291       /* Rewrite the increment so that it uses var_before directly.  */
6292       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6293       return;
6294     }
6295
6296   gimple_add_tmp_var (cand->var_before);
6297   add_referenced_var (cand->var_before);
6298
6299   base = unshare_expr (cand->iv->base);
6300
6301   create_iv (base, unshare_expr (cand->iv->step),
6302              cand->var_before, data->current_loop,
6303              &incr_pos, after, &cand->var_before, &cand->var_after);
6304 }
6305
6306 /* Creates new induction variables described in SET.  */
6307
6308 static void
6309 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6310 {
6311   unsigned i;
6312   struct iv_cand *cand;
6313   bitmap_iterator bi;
6314
6315   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6316     {
6317       cand = iv_cand (data, i);
6318       create_new_iv (data, cand);
6319     }
6320
6321   if (dump_file && (dump_flags & TDF_DETAILS))
6322     {
6323       fprintf (dump_file, "\nSelected IV set: \n");
6324       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6325         {
6326           cand = iv_cand (data, i);
6327           dump_cand (dump_file, cand);
6328         }
6329       fprintf (dump_file, "\n");
6330     }
6331 }
6332
6333 /* Rewrites USE (definition of iv used in a nonlinear expression)
6334    using candidate CAND.  */
6335
6336 static void
6337 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6338                             struct iv_use *use, struct iv_cand *cand)
6339 {
6340   tree comp;
6341   tree op, tgt;
6342   gimple ass;
6343   gimple_stmt_iterator bsi;
6344
6345   /* An important special case -- if we are asked to express value of
6346      the original iv by itself, just exit; there is no need to
6347      introduce a new computation (that might also need casting the
6348      variable to unsigned and back).  */
6349   if (cand->pos == IP_ORIGINAL
6350       && cand->incremented_at == use->stmt)
6351     {
6352       tree step, ctype, utype;
6353       enum tree_code incr_code = PLUS_EXPR, old_code;
6354
6355       gcc_assert (is_gimple_assign (use->stmt));
6356       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6357
6358       step = cand->iv->step;
6359       ctype = TREE_TYPE (step);
6360       utype = TREE_TYPE (cand->var_after);
6361       if (TREE_CODE (step) == NEGATE_EXPR)
6362         {
6363           incr_code = MINUS_EXPR;
6364           step = TREE_OPERAND (step, 0);
6365         }
6366
6367       /* Check whether we may leave the computation unchanged.
6368          This is the case only if it does not rely on other
6369          computations in the loop -- otherwise, the computation
6370          we rely upon may be removed in remove_unused_ivs,
6371          thus leading to ICE.  */
6372       old_code = gimple_assign_rhs_code (use->stmt);
6373       if (old_code == PLUS_EXPR
6374           || old_code == MINUS_EXPR
6375           || old_code == POINTER_PLUS_EXPR)
6376         {
6377           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6378             op = gimple_assign_rhs2 (use->stmt);
6379           else if (old_code != MINUS_EXPR
6380                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6381             op = gimple_assign_rhs1 (use->stmt);
6382           else
6383             op = NULL_TREE;
6384         }
6385       else
6386         op = NULL_TREE;
6387
6388       if (op
6389           && (TREE_CODE (op) == INTEGER_CST
6390               || operand_equal_p (op, step, 0)))
6391         return;
6392
6393       /* Otherwise, add the necessary computations to express
6394          the iv.  */
6395       op = fold_convert (ctype, cand->var_before);
6396       comp = fold_convert (utype,
6397                            build2 (incr_code, ctype, op,
6398                                    unshare_expr (step)));
6399     }
6400   else
6401     {
6402       comp = get_computation (data->current_loop, use, cand);
6403       gcc_assert (comp != NULL_TREE);
6404     }
6405
6406   switch (gimple_code (use->stmt))
6407     {
6408     case GIMPLE_PHI:
6409       tgt = PHI_RESULT (use->stmt);
6410
6411       /* If we should keep the biv, do not replace it.  */
6412       if (name_info (data, tgt)->preserve_biv)
6413         return;
6414
6415       bsi = gsi_after_labels (gimple_bb (use->stmt));
6416       break;
6417
6418     case GIMPLE_ASSIGN:
6419       tgt = gimple_assign_lhs (use->stmt);
6420       bsi = gsi_for_stmt (use->stmt);
6421       break;
6422
6423     default:
6424       gcc_unreachable ();
6425     }
6426
6427   if (!valid_gimple_rhs_p (comp)
6428       || (gimple_code (use->stmt) != GIMPLE_PHI
6429           /* We can't allow re-allocating the stmt as it might be pointed
6430              to still.  */
6431           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6432               >= gimple_num_ops (gsi_stmt (bsi)))))
6433     {
6434       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6435                                        true, GSI_SAME_STMT);
6436       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6437         {
6438           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6439           /* As this isn't a plain copy we have to reset alignment
6440              information.  */
6441           if (SSA_NAME_PTR_INFO (comp))
6442             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6443         }
6444     }
6445
6446   if (gimple_code (use->stmt) == GIMPLE_PHI)
6447     {
6448       ass = gimple_build_assign (tgt, comp);
6449       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6450
6451       bsi = gsi_for_stmt (use->stmt);
6452       remove_phi_node (&bsi, false);
6453     }
6454   else
6455     {
6456       gimple_assign_set_rhs_from_tree (&bsi, comp);
6457       use->stmt = gsi_stmt (bsi);
6458     }
6459 }
6460
6461 /* Performs a peephole optimization to reorder the iv update statement with
6462    a mem ref to enable instruction combining in later phases. The mem ref uses
6463    the iv value before the update, so the reordering transformation requires
6464    adjustment of the offset. CAND is the selected IV_CAND.
6465
6466    Example:
6467
6468    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6469    iv2 = iv1 + 1;
6470
6471    if (t < val)      (1)
6472      goto L;
6473    goto Head;
6474
6475
6476    directly propagating t over to (1) will introduce overlapping live range
6477    thus increase register pressure. This peephole transform it into:
6478
6479
6480    iv2 = iv1 + 1;
6481    t = MEM_REF (base, iv2, 8, 8);
6482    if (t < val)
6483      goto L;
6484    goto Head;
6485 */
6486
6487 static void
6488 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6489 {
6490   tree var_after;
6491   gimple iv_update, stmt;
6492   basic_block bb;
6493   gimple_stmt_iterator gsi, gsi_iv;
6494
6495   if (cand->pos != IP_NORMAL)
6496     return;
6497
6498   var_after = cand->var_after;
6499   iv_update = SSA_NAME_DEF_STMT (var_after);
6500
6501   bb = gimple_bb (iv_update);
6502   gsi = gsi_last_nondebug_bb (bb);
6503   stmt = gsi_stmt (gsi);
6504
6505   /* Only handle conditional statement for now.  */
6506   if (gimple_code (stmt) != GIMPLE_COND)
6507     return;
6508
6509   gsi_prev_nondebug (&gsi);
6510   stmt = gsi_stmt (gsi);
6511   if (stmt != iv_update)
6512     return;
6513
6514   gsi_prev_nondebug (&gsi);
6515   if (gsi_end_p (gsi))
6516     return;
6517
6518   stmt = gsi_stmt (gsi);
6519   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6520     return;
6521
6522   if (stmt != use->stmt)
6523     return;
6524
6525   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6526     return;
6527
6528   if (dump_file && (dump_flags & TDF_DETAILS))
6529     {
6530       fprintf (dump_file, "Reordering \n");
6531       print_gimple_stmt (dump_file, iv_update, 0, 0);
6532       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6533       fprintf (dump_file, "\n");
6534     }
6535
6536   gsi = gsi_for_stmt (use->stmt);
6537   gsi_iv = gsi_for_stmt (iv_update);
6538   gsi_move_before (&gsi_iv, &gsi);
6539
6540   cand->pos = IP_BEFORE_USE;
6541   cand->incremented_at = use->stmt;
6542 }
6543
6544 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6545
6546 static void
6547 rewrite_use_address (struct ivopts_data *data,
6548                      struct iv_use *use, struct iv_cand *cand)
6549 {
6550   aff_tree aff;
6551   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6552   tree base_hint = NULL_TREE;
6553   tree ref, iv;
6554   bool ok;
6555
6556   adjust_iv_update_pos (cand, use);
6557   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6558   gcc_assert (ok);
6559   unshare_aff_combination (&aff);
6560
6561   /* To avoid undefined overflow problems, all IV candidates use unsigned
6562      integer types.  The drawback is that this makes it impossible for
6563      create_mem_ref to distinguish an IV that is based on a memory object
6564      from one that represents simply an offset.
6565
6566      To work around this problem, we pass a hint to create_mem_ref that
6567      indicates which variable (if any) in aff is an IV based on a memory
6568      object.  Note that we only consider the candidate.  If this is not
6569      based on an object, the base of the reference is in some subexpression
6570      of the use -- but these will use pointer types, so they are recognized
6571      by the create_mem_ref heuristics anyway.  */
6572   if (cand->iv->base_object)
6573     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6574
6575   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6576   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6577                         reference_alias_ptr_type (*use->op_p),
6578                         iv, base_hint, data->speed);
6579   copy_ref_info (ref, *use->op_p);
6580   *use->op_p = ref;
6581 }
6582
6583 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6584    candidate CAND.  */
6585
6586 static void
6587 rewrite_use_compare (struct ivopts_data *data,
6588                      struct iv_use *use, struct iv_cand *cand)
6589 {
6590   tree comp, *var_p, op, bound;
6591   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6592   enum tree_code compare;
6593   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6594   bool ok;
6595
6596   bound = cp->value;
6597   if (bound)
6598     {
6599       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6600       tree var_type = TREE_TYPE (var);
6601       gimple_seq stmts;
6602
6603       if (dump_file && (dump_flags & TDF_DETAILS))
6604         {
6605           fprintf (dump_file, "Replacing exit test: ");
6606           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6607         }
6608       compare = cp->comp;
6609       bound = unshare_expr (fold_convert (var_type, bound));
6610       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6611       if (stmts)
6612         gsi_insert_seq_on_edge_immediate (
6613                 loop_preheader_edge (data->current_loop),
6614                 stmts);
6615
6616       gimple_cond_set_lhs (use->stmt, var);
6617       gimple_cond_set_code (use->stmt, compare);
6618       gimple_cond_set_rhs (use->stmt, op);
6619       return;
6620     }
6621
6622   /* The induction variable elimination failed; just express the original
6623      giv.  */
6624   comp = get_computation (data->current_loop, use, cand);
6625   gcc_assert (comp != NULL_TREE);
6626
6627   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6628   gcc_assert (ok);
6629
6630   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6631                                      true, GSI_SAME_STMT);
6632 }
6633
6634 /* Rewrites USE using candidate CAND.  */
6635
6636 static void
6637 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6638 {
6639   switch (use->type)
6640     {
6641       case USE_NONLINEAR_EXPR:
6642         rewrite_use_nonlinear_expr (data, use, cand);
6643         break;
6644
6645       case USE_ADDRESS:
6646         rewrite_use_address (data, use, cand);
6647         break;
6648
6649       case USE_COMPARE:
6650         rewrite_use_compare (data, use, cand);
6651         break;
6652
6653       default:
6654         gcc_unreachable ();
6655     }
6656
6657   update_stmt (use->stmt);
6658 }
6659
6660 /* Rewrite the uses using the selected induction variables.  */
6661
6662 static void
6663 rewrite_uses (struct ivopts_data *data)
6664 {
6665   unsigned i;
6666   struct iv_cand *cand;
6667   struct iv_use *use;
6668
6669   for (i = 0; i < n_iv_uses (data); i++)
6670     {
6671       use = iv_use (data, i);
6672       cand = use->selected;
6673       gcc_assert (cand);
6674
6675       rewrite_use (data, use, cand);
6676     }
6677 }
6678
6679 /* Removes the ivs that are not used after rewriting.  */
6680
6681 static void
6682 remove_unused_ivs (struct ivopts_data *data)
6683 {
6684   unsigned j;
6685   bitmap_iterator bi;
6686   bitmap toremove = BITMAP_ALLOC (NULL);
6687
6688   /* Figure out an order in which to release SSA DEFs so that we don't
6689      release something that we'd have to propagate into a debug stmt
6690      afterwards.  */
6691   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6692     {
6693       struct version_info *info;
6694
6695       info = ver_info (data, j);
6696       if (info->iv
6697           && !integer_zerop (info->iv->step)
6698           && !info->inv_id
6699           && !info->iv->have_use_for
6700           && !info->preserve_biv)
6701         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6702     }
6703
6704   release_defs_bitset (toremove);
6705
6706   BITMAP_FREE (toremove);
6707 }
6708
6709 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6710    for pointer_map_traverse.  */
6711
6712 static bool
6713 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6714                       void *data ATTRIBUTE_UNUSED)
6715 {
6716   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6717
6718   free (niter);
6719   return true;
6720 }
6721
6722 /* Frees data allocated by the optimization of a single loop.  */
6723
6724 static void
6725 free_loop_data (struct ivopts_data *data)
6726 {
6727   unsigned i, j;
6728   bitmap_iterator bi;
6729   tree obj;
6730
6731   if (data->niters)
6732     {
6733       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6734       pointer_map_destroy (data->niters);
6735       data->niters = NULL;
6736     }
6737
6738   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6739     {
6740       struct version_info *info;
6741
6742       info = ver_info (data, i);
6743       free (info->iv);
6744       info->iv = NULL;
6745       info->has_nonlin_use = false;
6746       info->preserve_biv = false;
6747       info->inv_id = 0;
6748     }
6749   bitmap_clear (data->relevant);
6750   bitmap_clear (data->important_candidates);
6751
6752   for (i = 0; i < n_iv_uses (data); i++)
6753     {
6754       struct iv_use *use = iv_use (data, i);
6755
6756       free (use->iv);
6757       BITMAP_FREE (use->related_cands);
6758       for (j = 0; j < use->n_map_members; j++)
6759         if (use->cost_map[j].depends_on)
6760           BITMAP_FREE (use->cost_map[j].depends_on);
6761       free (use->cost_map);
6762       free (use);
6763     }
6764   VEC_truncate (iv_use_p, data->iv_uses, 0);
6765
6766   for (i = 0; i < n_iv_cands (data); i++)
6767     {
6768       struct iv_cand *cand = iv_cand (data, i);
6769
6770       free (cand->iv);
6771       if (cand->depends_on)
6772         BITMAP_FREE (cand->depends_on);
6773       free (cand);
6774     }
6775   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6776
6777   if (data->version_info_size < num_ssa_names)
6778     {
6779       data->version_info_size = 2 * num_ssa_names;
6780       free (data->version_info);
6781       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6782     }
6783
6784   data->max_inv_id = 0;
6785
6786   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6787     SET_DECL_RTL (obj, NULL_RTX);
6788
6789   VEC_truncate (tree, decl_rtl_to_reset, 0);
6790
6791   htab_empty (data->inv_expr_tab);
6792   data->inv_expr_id = 0;
6793 }
6794
6795 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6796    loop tree.  */
6797
6798 static void
6799 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6800 {
6801   free_loop_data (data);
6802   free (data->version_info);
6803   BITMAP_FREE (data->relevant);
6804   BITMAP_FREE (data->important_candidates);
6805
6806   VEC_free (tree, heap, decl_rtl_to_reset);
6807   VEC_free (iv_use_p, heap, data->iv_uses);
6808   VEC_free (iv_cand_p, heap, data->iv_candidates);
6809   htab_delete (data->inv_expr_tab);
6810
6811   finalize_costs ();
6812 }
6813
6814 /* Returns true if the loop body BODY includes any function calls.  */
6815
6816 static bool
6817 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6818 {
6819   gimple_stmt_iterator gsi;
6820   unsigned i;
6821
6822   for (i = 0; i < num_nodes; i++)
6823     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6824       {
6825         gimple stmt = gsi_stmt (gsi);
6826         if (is_gimple_call (stmt)
6827             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6828           return true;
6829       }
6830   return false;
6831 }
6832
6833 /* Optimizes the LOOP.  Returns true if anything changed.  */
6834
6835 static bool
6836 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6837 {
6838   bool changed = false;
6839   struct iv_ca *iv_ca;
6840   edge exit = single_dom_exit (loop);
6841   basic_block *body;
6842
6843   gcc_assert (!data->niters);
6844   data->current_loop = loop;
6845   data->speed = optimize_loop_for_speed_p (loop);
6846
6847   if (dump_file && (dump_flags & TDF_DETAILS))
6848     {
6849       fprintf (dump_file, "Processing loop %d\n", loop->num);
6850
6851       if (exit)
6852         {
6853           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6854                    exit->src->index, exit->dest->index);
6855           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6856           fprintf (dump_file, "\n");
6857         }
6858
6859       fprintf (dump_file, "\n");
6860     }
6861
6862   body = get_loop_body (loop);
6863   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6864   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6865   free (body);
6866
6867   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6868
6869   /* For each ssa name determines whether it behaves as an induction variable
6870      in some loop.  */
6871   if (!find_induction_variables (data))
6872     goto finish;
6873
6874   /* Finds interesting uses (item 1).  */
6875   find_interesting_uses (data);
6876   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6877     goto finish;
6878
6879   /* Finds candidates for the induction variables (item 2).  */
6880   find_iv_candidates (data);
6881
6882   /* Calculates the costs (item 3, part 1).  */
6883   determine_iv_costs (data);
6884   determine_use_iv_costs (data);
6885   determine_set_costs (data);
6886
6887   /* Find the optimal set of induction variables (item 3, part 2).  */
6888   iv_ca = find_optimal_iv_set (data);
6889   if (!iv_ca)
6890     goto finish;
6891   changed = true;
6892
6893   /* Create the new induction variables (item 4, part 1).  */
6894   create_new_ivs (data, iv_ca);
6895   iv_ca_free (&iv_ca);
6896
6897   /* Rewrite the uses (item 4, part 2).  */
6898   rewrite_uses (data);
6899
6900   /* Remove the ivs that are unused after rewriting.  */
6901   remove_unused_ivs (data);
6902
6903   /* We have changed the structure of induction variables; it might happen
6904      that definitions in the scev database refer to some of them that were
6905      eliminated.  */
6906   scev_reset ();
6907
6908 finish:
6909   free_loop_data (data);
6910
6911   return changed;
6912 }
6913
6914 /* Main entry point.  Optimizes induction variables in loops.  */
6915
6916 void
6917 tree_ssa_iv_optimize (void)
6918 {
6919   struct loop *loop;
6920   struct ivopts_data data;
6921   loop_iterator li;
6922
6923   tree_ssa_iv_optimize_init (&data);
6924
6925   /* Optimize the loops starting with the innermost ones.  */
6926   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6927     {
6928       if (dump_file && (dump_flags & TDF_DETAILS))
6929         flow_loop_dump (loop, dump_file, NULL, 1);
6930
6931       tree_ssa_iv_optimize_loop (&data, loop);
6932     }
6933
6934   tree_ssa_iv_optimize_finalize (&data);
6935 }