gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2014 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "stor-layout.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "pointer-set.h"
  74 #include "hash-table.h"
  75 #include "tree-ssa-alias.h"
  76 #include "internal-fn.h"
  77 #include "tree-eh.h"
  78 #include "gimple-expr.h"
  79 #include "is-a.h"
  80 #include "gimple.h"
  81 #include "gimplify.h"
  82 #include "gimple-iterator.h"
  83 #include "gimplify-me.h"
  84 #include "gimple-ssa.h"
  85 #include "cgraph.h"
  86 #include "tree-cfg.h"
  87 #include "tree-phinodes.h"
  88 #include "ssa-iterators.h"
  89 #include "stringpool.h"
  90 #include "tree-ssanames.h"
  91 #include "tree-ssa-loop-ivopts.h"
  92 #include "tree-ssa-loop-manip.h"
  93 #include "tree-ssa-loop-niter.h"
  94 #include "tree-ssa-loop.h"
  95 #include "expr.h"
  96 #include "tree-dfa.h"
  97 #include "tree-ssa.h"
  98 #include "cfgloop.h"
  99 #include "tree-pass.h"
 100 #include "insn-config.h"
 101 #include "tree-chrec.h"
 102 #include "tree-scalar-evolution.h"
 103 #include "cfgloop.h"
 104 #include "params.h"
 105 #include "langhooks.h"
 106 #include "tree-affine.h"
 107 #include "target.h"
 108 #include "tree-inline.h"
 109 #include "tree-ssa-propagate.h"
 110 #include "expmed.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113
 114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 115    cost of different addressing modes.  This should be moved to a TBD
 116    interface between the GIMPLE and RTL worlds.  */
 117 #include "expr.h"
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   bool biv_p;           /* Is it a biv?  */
 147   bool have_use_for;    /* Do we already have a use for it?  */
 148   unsigned use_id;      /* The identifier in the use if it is the case.  */
 149 };
 150
 151 /* Per-ssa version information (induction variable descriptions, etc.).  */
 152 struct version_info
 153 {
 154   tree name;            /* The ssa name.  */
 155   struct iv *iv;        /* Induction variable description.  */
 156   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 157                            an expression that is not an induction variable.  */
 158   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 159   unsigned inv_id;      /* Id of an invariant.  */
 160 };
 161
 162 /* Types of uses.  */
 163 enum use_type
 164 {
 165   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 166   USE_ADDRESS,          /* Use in an address.  */
 167   USE_COMPARE           /* Use is a compare.  */
 168 };
 169
 170 /* Cost of a computation.  */
 171 typedef struct
 172 {
 173   int cost;             /* The runtime cost.  */
 174   unsigned complexity;  /* The estimate of the complexity of the code for
 175                            the computation (in no concrete units --
 176                            complexity field should be larger for more
 177                            complex expressions and addressing modes).  */
 178 } comp_cost;
 179
 180 static const comp_cost no_cost = {0, 0};
 181 static const comp_cost infinite_cost = {INFTY, INFTY};
 182
 183 /* The candidate - cost pair.  */
 184 struct cost_pair
 185 {
 186   struct iv_cand *cand; /* The candidate.  */
 187   comp_cost cost;       /* The cost.  */
 188   bitmap depends_on;    /* The list of invariants that have to be
 189                            preserved.  */
 190   tree value;           /* For final value elimination, the expression for
 191                            the final value of the iv.  For iv elimination,
 192                            the new bound to compare with.  */
 193   enum tree_code comp;  /* For iv elimination, the comparison.  */
 194   int inv_expr_id;      /* Loop invariant expression id.  */
 195 };
 196
 197 /* Use.  */
 198 struct iv_use
 199 {
 200   unsigned id;          /* The id of the use.  */
 201   enum use_type type;   /* Type of the use.  */
 202   struct iv *iv;        /* The induction variable it is based on.  */
 203   gimple stmt;          /* Statement in that it occurs.  */
 204   tree *op_p;           /* The place where it occurs.  */
 205   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 206                            important ones.  */
 207
 208   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 209   struct cost_pair *cost_map;
 210                         /* The costs wrto the iv candidates.  */
 211
 212   struct iv_cand *selected;
 213                         /* The selected candidate.  */
 214 };
 215
 216 /* The position where the iv is computed.  */
 217 enum iv_position
 218 {
 219   IP_NORMAL,            /* At the end, just before the exit condition.  */
 220   IP_END,               /* At the end of the latch block.  */
 221   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 222   IP_AFTER_USE,         /* Immediately after a specific use.  */
 223   IP_ORIGINAL           /* The original biv.  */
 224 };
 225
 226 /* The induction variable candidate.  */
 227 struct iv_cand
 228 {
 229   unsigned id;          /* The number of the candidate.  */
 230   bool important;       /* Whether this is an "important" candidate, i.e. such
 231                            that it should be considered by all uses.  */
 232   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 233   gimple incremented_at;/* For original biv, the statement where it is
 234                            incremented.  */
 235   tree var_before;      /* The variable used for it before increment.  */
 236   tree var_after;       /* The variable used for it after increment.  */
 237   struct iv *iv;        /* The value of the candidate.  NULL for
 238                            "pseudocandidate" used to indicate the possibility
 239                            to replace the final value of an iv by direct
 240                            computation of the value.  */
 241   unsigned cost;        /* Cost of the candidate.  */
 242   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 243   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 244                               where it is incremented.  */
 245   bitmap depends_on;    /* The list of invariants that are used in step of the
 246                            biv.  */
 247 };
 248
 249 /* Loop invariant expression hashtable entry.  */
 250 struct iv_inv_expr_ent
 251 {
 252   tree expr;
 253   int id;
 254   hashval_t hash;
 255 };
 256
 257 /* The data used by the induction variable optimizations.  */
 258
 259 typedef struct iv_use *iv_use_p;
 260
 261 typedef struct iv_cand *iv_cand_p;
 262
 263 /* Hashtable helpers.  */
 264
 265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 266 {
 267   typedef iv_inv_expr_ent value_type;
 268   typedef iv_inv_expr_ent compare_type;
 269   static inline hashval_t hash (const value_type *);
 270   static inline bool equal (const value_type *, const compare_type *);
 271 };
 272
 273 /* Hash function for loop invariant expressions.  */
 274
 275 inline hashval_t
 276 iv_inv_expr_hasher::hash (const value_type *expr)
 277 {
 278   return expr->hash;
 279 }
 280
 281 /* Hash table equality function for expressions.  */
 282
 283 inline bool
 284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 285 {
 286   return expr1->hash == expr2->hash
 287          && operand_equal_p (expr1->expr, expr2->expr, 0);
 288 }
 289
 290 struct ivopts_data
 291 {
 292   /* The currently optimized loop.  */
 293   struct loop *current_loop;
 294
 295   /* Numbers of iterations for all exits of the current loop.  */
 296   struct pointer_map_t *niters;
 297
 298   /* Number of registers used in it.  */
 299   unsigned regs_used;
 300
 301   /* The size of version_info array allocated.  */
 302   unsigned version_info_size;
 303
 304   /* The array of information for the ssa names.  */
 305   struct version_info *version_info;
 306
 307   /* The hashtable of loop invariant expressions created
 308      by ivopt.  */
 309   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 310
 311   /* Loop invariant expression id.  */
 312   int inv_expr_id;
 313
 314   /* The bitmap of indices in version_info whose value was changed.  */
 315   bitmap relevant;
 316
 317   /* The uses of induction variables.  */
 318   vec<iv_use_p> iv_uses;
 319
 320   /* The candidates.  */
 321   vec<iv_cand_p> iv_candidates;
 322
 323   /* A bitmap of important candidates.  */
 324   bitmap important_candidates;
 325
 326   /* The maximum invariant id.  */
 327   unsigned max_inv_id;
 328
 329   /* Whether to consider just related and important candidates when replacing a
 330      use.  */
 331   bool consider_all_candidates;
 332
 333   /* Are we optimizing for speed?  */
 334   bool speed;
 335
 336   /* Whether the loop body includes any function calls.  */
 337   bool body_includes_call;
 338
 339   /* Whether the loop body can only be exited via single exit.  */
 340   bool loop_single_exit_p;
 341 };
 342
 343 /* An assignment of iv candidates to uses.  */
 344
 345 struct iv_ca
 346 {
 347   /* The number of uses covered by the assignment.  */
 348   unsigned upto;
 349
 350   /* Number of uses that cannot be expressed by the candidates in the set.  */
 351   unsigned bad_uses;
 352
 353   /* Candidate assigned to a use, together with the related costs.  */
 354   struct cost_pair **cand_for_use;
 355
 356   /* Number of times each candidate is used.  */
 357   unsigned *n_cand_uses;
 358
 359   /* The candidates used.  */
 360   bitmap cands;
 361
 362   /* The number of candidates in the set.  */
 363   unsigned n_cands;
 364
 365   /* Total number of registers needed.  */
 366   unsigned n_regs;
 367
 368   /* Total cost of expressing uses.  */
 369   comp_cost cand_use_cost;
 370
 371   /* Total cost of candidates.  */
 372   unsigned cand_cost;
 373
 374   /* Number of times each invariant is used.  */
 375   unsigned *n_invariant_uses;
 376
 377   /* The array holding the number of uses of each loop
 378      invariant expressions created by ivopt.  */
 379   unsigned *used_inv_expr;
 380
 381   /* The number of created loop invariants.  */
 382   unsigned num_used_inv_expr;
 383
 384   /* Total cost of the assignment.  */
 385   comp_cost cost;
 386 };
 387
 388 /* Difference of two iv candidate assignments.  */
 389
 390 struct iv_ca_delta
 391 {
 392   /* Changed use.  */
 393   struct iv_use *use;
 394
 395   /* An old assignment (for rollback purposes).  */
 396   struct cost_pair *old_cp;
 397
 398   /* A new assignment.  */
 399   struct cost_pair *new_cp;
 400
 401   /* Next change in the list.  */
 402   struct iv_ca_delta *next_change;
 403 };
 404
 405 /* Bound on number of candidates below that all candidates are considered.  */
 406
 407 #define CONSIDER_ALL_CANDIDATES_BOUND \
 408   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 409
 410 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 411    optimizing such a loop would help, and it would take ages).  */
 412
 413 #define MAX_CONSIDERED_USES \
 414   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 415
 416 /* If there are at most this number of ivs in the set, try removing unnecessary
 417    ivs from the set always.  */
 418
 419 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 420   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 421
 422 /* The list of trees for that the decl_rtl field must be reset is stored
 423    here.  */
 424
 425 static vec<tree> decl_rtl_to_reset;
 426
 427 static comp_cost force_expr_to_var_cost (tree, bool);
 428
 429 /* Number of uses recorded in DATA.  */
 430
 431 static inline unsigned
 432 n_iv_uses (struct ivopts_data *data)
 433 {
 434   return data->iv_uses.length ();
 435 }
 436
 437 /* Ith use recorded in DATA.  */
 438
 439 static inline struct iv_use *
 440 iv_use (struct ivopts_data *data, unsigned i)
 441 {
 442   return data->iv_uses[i];
 443 }
 444
 445 /* Number of candidates recorded in DATA.  */
 446
 447 static inline unsigned
 448 n_iv_cands (struct ivopts_data *data)
 449 {
 450   return data->iv_candidates.length ();
 451 }
 452
 453 /* Ith candidate recorded in DATA.  */
 454
 455 static inline struct iv_cand *
 456 iv_cand (struct ivopts_data *data, unsigned i)
 457 {
 458   return data->iv_candidates[i];
 459 }
 460
 461 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 462
 463 edge
 464 single_dom_exit (struct loop *loop)
 465 {
 466   edge exit = single_exit (loop);
 467
 468   if (!exit)
 469     return NULL;
 470
 471   if (!just_once_each_iteration_p (loop, exit->src))
 472     return NULL;
 473
 474   return exit;
 475 }
 476
 477 /* Dumps information about the induction variable IV to FILE.  */
 478
 479 void
 480 dump_iv (FILE *file, struct iv *iv)
 481 {
 482   if (iv->ssa_name)
 483     {
 484       fprintf (file, "ssa name ");
 485       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 486       fprintf (file, "\n");
 487     }
 488
 489   fprintf (file, "  type ");
 490   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 491   fprintf (file, "\n");
 492
 493   if (iv->step)
 494     {
 495       fprintf (file, "  base ");
 496       print_generic_expr (file, iv->base, TDF_SLIM);
 497       fprintf (file, "\n");
 498
 499       fprintf (file, "  step ");
 500       print_generic_expr (file, iv->step, TDF_SLIM);
 501       fprintf (file, "\n");
 502     }
 503   else
 504     {
 505       fprintf (file, "  invariant ");
 506       print_generic_expr (file, iv->base, TDF_SLIM);
 507       fprintf (file, "\n");
 508     }
 509
 510   if (iv->base_object)
 511     {
 512       fprintf (file, "  base object ");
 513       print_generic_expr (file, iv->base_object, TDF_SLIM);
 514       fprintf (file, "\n");
 515     }
 516
 517   if (iv->biv_p)
 518     fprintf (file, "  is a biv\n");
 519 }
 520
 521 /* Dumps information about the USE to FILE.  */
 522
 523 void
 524 dump_use (FILE *file, struct iv_use *use)
 525 {
 526   fprintf (file, "use %d\n", use->id);
 527
 528   switch (use->type)
 529     {
 530     case USE_NONLINEAR_EXPR:
 531       fprintf (file, "  generic\n");
 532       break;
 533
 534     case USE_ADDRESS:
 535       fprintf (file, "  address\n");
 536       break;
 537
 538     case USE_COMPARE:
 539       fprintf (file, "  compare\n");
 540       break;
 541
 542     default:
 543       gcc_unreachable ();
 544     }
 545
 546   fprintf (file, "  in statement ");
 547   print_gimple_stmt (file, use->stmt, 0, 0);
 548   fprintf (file, "\n");
 549
 550   fprintf (file, "  at position ");
 551   if (use->op_p)
 552     print_generic_expr (file, *use->op_p, TDF_SLIM);
 553   fprintf (file, "\n");
 554
 555   dump_iv (file, use->iv);
 556
 557   if (use->related_cands)
 558     {
 559       fprintf (file, "  related candidates ");
 560       dump_bitmap (file, use->related_cands);
 561     }
 562 }
 563
 564 /* Dumps information about the uses to FILE.  */
 565
 566 void
 567 dump_uses (FILE *file, struct ivopts_data *data)
 568 {
 569   unsigned i;
 570   struct iv_use *use;
 571
 572   for (i = 0; i < n_iv_uses (data); i++)
 573     {
 574       use = iv_use (data, i);
 575
 576       dump_use (file, use);
 577       fprintf (file, "\n");
 578     }
 579 }
 580
 581 /* Dumps information about induction variable candidate CAND to FILE.  */
 582
 583 void
 584 dump_cand (FILE *file, struct iv_cand *cand)
 585 {
 586   struct iv *iv = cand->iv;
 587
 588   fprintf (file, "candidate %d%s\n",
 589            cand->id, cand->important ? " (important)" : "");
 590
 591   if (cand->depends_on)
 592     {
 593       fprintf (file, "  depends on ");
 594       dump_bitmap (file, cand->depends_on);
 595     }
 596
 597   if (!iv)
 598     {
 599       fprintf (file, "  final value replacement\n");
 600       return;
 601     }
 602
 603   if (cand->var_before)
 604     {
 605       fprintf (file, "  var_before ");
 606       print_generic_expr (file, cand->var_before, TDF_SLIM);
 607       fprintf (file, "\n");
 608     }
 609   if (cand->var_after)
 610     {
 611       fprintf (file, "  var_after ");
 612       print_generic_expr (file, cand->var_after, TDF_SLIM);
 613       fprintf (file, "\n");
 614     }
 615
 616   switch (cand->pos)
 617     {
 618     case IP_NORMAL:
 619       fprintf (file, "  incremented before exit test\n");
 620       break;
 621
 622     case IP_BEFORE_USE:
 623       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 624       break;
 625
 626     case IP_AFTER_USE:
 627       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 628       break;
 629
 630     case IP_END:
 631       fprintf (file, "  incremented at end\n");
 632       break;
 633
 634     case IP_ORIGINAL:
 635       fprintf (file, "  original biv\n");
 636       break;
 637     }
 638
 639   dump_iv (file, iv);
 640 }
 641
 642 /* Returns the info for ssa version VER.  */
 643
 644 static inline struct version_info *
 645 ver_info (struct ivopts_data *data, unsigned ver)
 646 {
 647   return data->version_info + ver;
 648 }
 649
 650 /* Returns the info for ssa name NAME.  */
 651
 652 static inline struct version_info *
 653 name_info (struct ivopts_data *data, tree name)
 654 {
 655   return ver_info (data, SSA_NAME_VERSION (name));
 656 }
 657
 658 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 659    emitted in LOOP.  */
 660
 661 static bool
 662 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 663 {
 664   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 665
 666   gcc_assert (bb);
 667
 668   if (sbb == loop->latch)
 669     return true;
 670
 671   if (sbb != bb)
 672     return false;
 673
 674   return stmt == last_stmt (bb);
 675 }
 676
 677 /* Returns true if STMT if after the place where the original induction
 678    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 679    if the positions are identical.  */
 680
 681 static bool
 682 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 683 {
 684   basic_block cand_bb = gimple_bb (cand->incremented_at);
 685   basic_block stmt_bb = gimple_bb (stmt);
 686
 687   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 688     return false;
 689
 690   if (stmt_bb != cand_bb)
 691     return true;
 692
 693   if (true_if_equal
 694       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 695     return true;
 696   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 697 }
 698
 699 /* Returns true if STMT if after the place where the induction variable
 700    CAND is incremented in LOOP.  */
 701
 702 static bool
 703 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 704 {
 705   switch (cand->pos)
 706     {
 707     case IP_END:
 708       return false;
 709
 710     case IP_NORMAL:
 711       return stmt_after_ip_normal_pos (loop, stmt);
 712
 713     case IP_ORIGINAL:
 714     case IP_AFTER_USE:
 715       return stmt_after_inc_pos (cand, stmt, false);
 716
 717     case IP_BEFORE_USE:
 718       return stmt_after_inc_pos (cand, stmt, true);
 719
 720     default:
 721       gcc_unreachable ();
 722     }
 723 }
 724
 725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 726
 727 static bool
 728 abnormal_ssa_name_p (tree exp)
 729 {
 730   if (!exp)
 731     return false;
 732
 733   if (TREE_CODE (exp) != SSA_NAME)
 734     return false;
 735
 736   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 737 }
 738
 739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 740    abnormal phi node.  Callback for for_each_index.  */
 741
 742 static bool
 743 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 744                                   void *data ATTRIBUTE_UNUSED)
 745 {
 746   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 747     {
 748       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 749         return false;
 750       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 751         return false;
 752     }
 753
 754   return !abnormal_ssa_name_p (*index);
 755 }
 756
 757 /* Returns true if EXPR contains a ssa name that occurs in an
 758    abnormal phi node.  */
 759
 760 bool
 761 contains_abnormal_ssa_name_p (tree expr)
 762 {
 763   enum tree_code code;
 764   enum tree_code_class codeclass;
 765
 766   if (!expr)
 767     return false;
 768
 769   code = TREE_CODE (expr);
 770   codeclass = TREE_CODE_CLASS (code);
 771
 772   if (code == SSA_NAME)
 773     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 774
 775   if (code == INTEGER_CST
 776       || is_gimple_min_invariant (expr))
 777     return false;
 778
 779   if (code == ADDR_EXPR)
 780     return !for_each_index (&TREE_OPERAND (expr, 0),
 781                             idx_contains_abnormal_ssa_name_p,
 782                             NULL);
 783
 784   if (code == COND_EXPR)
 785     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 786       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 787       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 788
 789   switch (codeclass)
 790     {
 791     case tcc_binary:
 792     case tcc_comparison:
 793       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 794         return true;
 795
 796       /* Fallthru.  */
 797     case tcc_unary:
 798       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 799         return true;
 800
 801       break;
 802
 803     default:
 804       gcc_unreachable ();
 805     }
 806
 807   return false;
 808 }
 809
 810 /*  Returns the structure describing number of iterations determined from
 811     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 812
 813 static struct tree_niter_desc *
 814 niter_for_exit (struct ivopts_data *data, edge exit)
 815 {
 816   struct tree_niter_desc *desc;
 817   void **slot;
 818
 819   if (!data->niters)
 820     {
 821       data->niters = pointer_map_create ();
 822       slot = NULL;
 823     }
 824   else
 825     slot = pointer_map_contains (data->niters, exit);
 826
 827   if (!slot)
 828     {
 829       /* Try to determine number of iterations.  We cannot safely work with ssa
 830          names that appear in phi nodes on abnormal edges, so that we do not
 831          create overlapping life ranges for them (PR 27283).  */
 832       desc = XNEW (struct tree_niter_desc);
 833       if (!number_of_iterations_exit (data->current_loop,
 834                                       exit, desc, true)
 835           || contains_abnormal_ssa_name_p (desc->niter))
 836         {
 837           XDELETE (desc);
 838           desc = NULL;
 839         }
 840       slot = pointer_map_insert (data->niters, exit);
 841       *slot = desc;
 842     }
 843   else
 844     desc = (struct tree_niter_desc *) *slot;
 845
 846   return desc;
 847 }
 848
 849 /* Returns the structure describing number of iterations determined from
 850    single dominating exit of DATA->current_loop, or NULL if something
 851    goes wrong.  */
 852
 853 static struct tree_niter_desc *
 854 niter_for_single_dom_exit (struct ivopts_data *data)
 855 {
 856   edge exit = single_dom_exit (data->current_loop);
 857
 858   if (!exit)
 859     return NULL;
 860
 861   return niter_for_exit (data, exit);
 862 }
 863
 864 /* Initializes data structures used by the iv optimization pass, stored
 865    in DATA.  */
 866
 867 static void
 868 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 869 {
 870   data->version_info_size = 2 * num_ssa_names;
 871   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 872   data->relevant = BITMAP_ALLOC (NULL);
 873   data->important_candidates = BITMAP_ALLOC (NULL);
 874   data->max_inv_id = 0;
 875   data->niters = NULL;
 876   data->iv_uses.create (20);
 877   data->iv_candidates.create (20);
 878   data->inv_expr_tab.create (10);
 879   data->inv_expr_id = 0;
 880   decl_rtl_to_reset.create (20);
 881 }
 882
 883 /* Returns a memory object to that EXPR points.  In case we are able to
 884    determine that it does not point to any such object, NULL is returned.  */
 885
 886 static tree
 887 determine_base_object (tree expr)
 888 {
 889   enum tree_code code = TREE_CODE (expr);
 890   tree base, obj;
 891
 892   /* If this is a pointer casted to any type, we need to determine
 893      the base object for the pointer; so handle conversions before
 894      throwing away non-pointer expressions.  */
 895   if (CONVERT_EXPR_P (expr))
 896     return determine_base_object (TREE_OPERAND (expr, 0));
 897
 898   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 899     return NULL_TREE;
 900
 901   switch (code)
 902     {
 903     case INTEGER_CST:
 904       return NULL_TREE;
 905
 906     case ADDR_EXPR:
 907       obj = TREE_OPERAND (expr, 0);
 908       base = get_base_address (obj);
 909
 910       if (!base)
 911         return expr;
 912
 913       if (TREE_CODE (base) == MEM_REF)
 914         return determine_base_object (TREE_OPERAND (base, 0));
 915
 916       return fold_convert (ptr_type_node,
 917                            build_fold_addr_expr (base));
 918
 919     case POINTER_PLUS_EXPR:
 920       return determine_base_object (TREE_OPERAND (expr, 0));
 921
 922     case PLUS_EXPR:
 923     case MINUS_EXPR:
 924       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 925       gcc_unreachable ();
 926
 927     default:
 928       return fold_convert (ptr_type_node, expr);
 929     }
 930 }
 931
 932 /* Return true if address expression with non-DECL_P operand appears
 933    in EXPR.  */
 934
 935 static bool
 936 contain_complex_addr_expr (tree expr)
 937 {
 938   bool res = false;
 939
 940   STRIP_NOPS (expr);
 941   switch (TREE_CODE (expr))
 942     {
 943     case POINTER_PLUS_EXPR:
 944     case PLUS_EXPR:
 945     case MINUS_EXPR:
 946       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 947       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 948       break;
 949
 950     case ADDR_EXPR:
 951       return (!DECL_P (TREE_OPERAND (expr, 0)));
 952
 953     default:
 954       return false;
 955     }
 956
 957   return res;
 958 }
 959
 960 /* Allocates an induction variable with given initial value BASE and step STEP
 961    for loop LOOP.  */
 962
 963 static struct iv *
 964 alloc_iv (tree base, tree step)
 965 {
 966   tree expr = base;
 967   struct iv *iv = XCNEW (struct iv);
 968   gcc_assert (step != NULL_TREE);
 969
 970   /* Lower address expression in base except ones with DECL_P as operand.
 971      By doing this:
 972        1) More accurate cost can be computed for address expressions;
 973        2) Duplicate candidates won't be created for bases in different
 974           forms, like &a[0] and &a.  */
 975   STRIP_NOPS (expr);
 976   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
 977       || contain_complex_addr_expr (expr))
 978     {
 979       aff_tree comb;
 980       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
 981       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 982     }
 983
 984   iv->base = base;
 985   iv->base_object = determine_base_object (base);
 986   iv->step = step;
 987   iv->biv_p = false;
 988   iv->have_use_for = false;
 989   iv->use_id = 0;
 990   iv->ssa_name = NULL_TREE;
 991
 992   return iv;
 993 }
 994
 995 /* Sets STEP and BASE for induction variable IV.  */
 996
 997 static void
 998 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 999 {
1000   struct version_info *info = name_info (data, iv);
1001
1002   gcc_assert (!info->iv);
1003
1004   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1005   info->iv = alloc_iv (base, step);
1006   info->iv->ssa_name = iv;
1007 }
1008
1009 /* Finds induction variable declaration for VAR.  */
1010
1011 static struct iv *
1012 get_iv (struct ivopts_data *data, tree var)
1013 {
1014   basic_block bb;
1015   tree type = TREE_TYPE (var);
1016
1017   if (!POINTER_TYPE_P (type)
1018       && !INTEGRAL_TYPE_P (type))
1019     return NULL;
1020
1021   if (!name_info (data, var)->iv)
1022     {
1023       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1024
1025       if (!bb
1026           || !flow_bb_inside_loop_p (data->current_loop, bb))
1027         set_iv (data, var, var, build_int_cst (type, 0));
1028     }
1029
1030   return name_info (data, var)->iv;
1031 }
1032
1033 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1034    not define a simple affine biv with nonzero step.  */
1035
1036 static tree
1037 determine_biv_step (gimple phi)
1038 {
1039   struct loop *loop = gimple_bb (phi)->loop_father;
1040   tree name = PHI_RESULT (phi);
1041   affine_iv iv;
1042
1043   if (virtual_operand_p (name))
1044     return NULL_TREE;
1045
1046   if (!simple_iv (loop, loop, name, &iv, true))
1047     return NULL_TREE;
1048
1049   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1050 }
1051
1052 /* Finds basic ivs.  */
1053
1054 static bool
1055 find_bivs (struct ivopts_data *data)
1056 {
1057   gimple phi;
1058   tree step, type, base;
1059   bool found = false;
1060   struct loop *loop = data->current_loop;
1061   gimple_stmt_iterator psi;
1062
1063   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1064     {
1065       phi = gsi_stmt (psi);
1066
1067       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1068         continue;
1069
1070       step = determine_biv_step (phi);
1071       if (!step)
1072         continue;
1073
1074       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1075       base = expand_simple_operations (base);
1076       if (contains_abnormal_ssa_name_p (base)
1077           || contains_abnormal_ssa_name_p (step))
1078         continue;
1079
1080       type = TREE_TYPE (PHI_RESULT (phi));
1081       base = fold_convert (type, base);
1082       if (step)
1083         {
1084           if (POINTER_TYPE_P (type))
1085             step = convert_to_ptrofftype (step);
1086           else
1087             step = fold_convert (type, step);
1088         }
1089
1090       set_iv (data, PHI_RESULT (phi), base, step);
1091       found = true;
1092     }
1093
1094   return found;
1095 }
1096
1097 /* Marks basic ivs.  */
1098
1099 static void
1100 mark_bivs (struct ivopts_data *data)
1101 {
1102   gimple phi, def;
1103   tree var;
1104   struct iv *iv, *incr_iv;
1105   struct loop *loop = data->current_loop;
1106   basic_block incr_bb;
1107   gimple_stmt_iterator psi;
1108
1109   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1110     {
1111       phi = gsi_stmt (psi);
1112
1113       iv = get_iv (data, PHI_RESULT (phi));
1114       if (!iv)
1115         continue;
1116
1117       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1118       def = SSA_NAME_DEF_STMT (var);
1119       /* Don't mark iv peeled from other one as biv.  */
1120       if (def
1121           && gimple_code (def) == GIMPLE_PHI
1122           && gimple_bb (def) == loop->header)
1123         continue;
1124
1125       incr_iv = get_iv (data, var);
1126       if (!incr_iv)
1127         continue;
1128
1129       /* If the increment is in the subloop, ignore it.  */
1130       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1131       if (incr_bb->loop_father != data->current_loop
1132           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1133         continue;
1134
1135       iv->biv_p = true;
1136       incr_iv->biv_p = true;
1137     }
1138 }
1139
1140 /* Checks whether STMT defines a linear induction variable and stores its
1141    parameters to IV.  */
1142
1143 static bool
1144 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1145 {
1146   tree lhs;
1147   struct loop *loop = data->current_loop;
1148
1149   iv->base = NULL_TREE;
1150   iv->step = NULL_TREE;
1151
1152   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1153     return false;
1154
1155   lhs = gimple_assign_lhs (stmt);
1156   if (TREE_CODE (lhs) != SSA_NAME)
1157     return false;
1158
1159   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1160     return false;
1161   iv->base = expand_simple_operations (iv->base);
1162
1163   if (contains_abnormal_ssa_name_p (iv->base)
1164       || contains_abnormal_ssa_name_p (iv->step))
1165     return false;
1166
1167   /* If STMT could throw, then do not consider STMT as defining a GIV.
1168      While this will suppress optimizations, we can not safely delete this
1169      GIV and associated statements, even if it appears it is not used.  */
1170   if (stmt_could_throw_p (stmt))
1171     return false;
1172
1173   return true;
1174 }
1175
1176 /* Finds general ivs in statement STMT.  */
1177
1178 static void
1179 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1180 {
1181   affine_iv iv;
1182
1183   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1184     return;
1185
1186   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1187 }
1188
1189 /* Finds general ivs in basic block BB.  */
1190
1191 static void
1192 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1193 {
1194   gimple_stmt_iterator bsi;
1195
1196   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1197     find_givs_in_stmt (data, gsi_stmt (bsi));
1198 }
1199
1200 /* Finds general ivs.  */
1201
1202 static void
1203 find_givs (struct ivopts_data *data)
1204 {
1205   struct loop *loop = data->current_loop;
1206   basic_block *body = get_loop_body_in_dom_order (loop);
1207   unsigned i;
1208
1209   for (i = 0; i < loop->num_nodes; i++)
1210     find_givs_in_bb (data, body[i]);
1211   free (body);
1212 }
1213
1214 /* For each ssa name defined in LOOP determines whether it is an induction
1215    variable and if so, its initial value and step.  */
1216
1217 static bool
1218 find_induction_variables (struct ivopts_data *data)
1219 {
1220   unsigned i;
1221   bitmap_iterator bi;
1222
1223   if (!find_bivs (data))
1224     return false;
1225
1226   find_givs (data);
1227   mark_bivs (data);
1228
1229   if (dump_file && (dump_flags & TDF_DETAILS))
1230     {
1231       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1232
1233       if (niter)
1234         {
1235           fprintf (dump_file, "  number of iterations ");
1236           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1237           if (!integer_zerop (niter->may_be_zero))
1238             {
1239               fprintf (dump_file, "; zero if ");
1240               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1241             }
1242           fprintf (dump_file, "\n\n");
1243         };
1244
1245       fprintf (dump_file, "Induction variables:\n\n");
1246
1247       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1248         {
1249           if (ver_info (data, i)->iv)
1250             dump_iv (dump_file, ver_info (data, i)->iv);
1251         }
1252     }
1253
1254   return true;
1255 }
1256
1257 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1258
1259 static struct iv_use *
1260 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1261             gimple stmt, enum use_type use_type)
1262 {
1263   struct iv_use *use = XCNEW (struct iv_use);
1264
1265   use->id = n_iv_uses (data);
1266   use->type = use_type;
1267   use->iv = iv;
1268   use->stmt = stmt;
1269   use->op_p = use_p;
1270   use->related_cands = BITMAP_ALLOC (NULL);
1271
1272   /* To avoid showing ssa name in the dumps, if it was not reset by the
1273      caller.  */
1274   iv->ssa_name = NULL_TREE;
1275
1276   if (dump_file && (dump_flags & TDF_DETAILS))
1277     dump_use (dump_file, use);
1278
1279   data->iv_uses.safe_push (use);
1280
1281   return use;
1282 }
1283
1284 /* Checks whether OP is a loop-level invariant and if so, records it.
1285    NONLINEAR_USE is true if the invariant is used in a way we do not
1286    handle specially.  */
1287
1288 static void
1289 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1290 {
1291   basic_block bb;
1292   struct version_info *info;
1293
1294   if (TREE_CODE (op) != SSA_NAME
1295       || virtual_operand_p (op))
1296     return;
1297
1298   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1299   if (bb
1300       && flow_bb_inside_loop_p (data->current_loop, bb))
1301     return;
1302
1303   info = name_info (data, op);
1304   info->name = op;
1305   info->has_nonlin_use |= nonlinear_use;
1306   if (!info->inv_id)
1307     info->inv_id = ++data->max_inv_id;
1308   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1309 }
1310
1311 /* Checks whether the use OP is interesting and if so, records it.  */
1312
1313 static struct iv_use *
1314 find_interesting_uses_op (struct ivopts_data *data, tree op)
1315 {
1316   struct iv *iv;
1317   struct iv *civ;
1318   gimple stmt;
1319   struct iv_use *use;
1320
1321   if (TREE_CODE (op) != SSA_NAME)
1322     return NULL;
1323
1324   iv = get_iv (data, op);
1325   if (!iv)
1326     return NULL;
1327
1328   if (iv->have_use_for)
1329     {
1330       use = iv_use (data, iv->use_id);
1331
1332       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1333       return use;
1334     }
1335
1336   if (integer_zerop (iv->step))
1337     {
1338       record_invariant (data, op, true);
1339       return NULL;
1340     }
1341   iv->have_use_for = true;
1342
1343   civ = XNEW (struct iv);
1344   *civ = *iv;
1345
1346   stmt = SSA_NAME_DEF_STMT (op);
1347   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1348               || is_gimple_assign (stmt));
1349
1350   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1351   iv->use_id = use->id;
1352
1353   return use;
1354 }
1355
1356 /* Given a condition in statement STMT, checks whether it is a compare
1357    of an induction variable and an invariant.  If this is the case,
1358    CONTROL_VAR is set to location of the iv, BOUND to the location of
1359    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1360    induction variable descriptions, and true is returned.  If this is not
1361    the case, CONTROL_VAR and BOUND are set to the arguments of the
1362    condition and false is returned.  */
1363
1364 static bool
1365 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1366                        tree **control_var, tree **bound,
1367                        struct iv **iv_var, struct iv **iv_bound)
1368 {
1369   /* The objects returned when COND has constant operands.  */
1370   static struct iv const_iv;
1371   static tree zero;
1372   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1373   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1374   bool ret = false;
1375
1376   if (gimple_code (stmt) == GIMPLE_COND)
1377     {
1378       op0 = gimple_cond_lhs_ptr (stmt);
1379       op1 = gimple_cond_rhs_ptr (stmt);
1380     }
1381   else
1382     {
1383       op0 = gimple_assign_rhs1_ptr (stmt);
1384       op1 = gimple_assign_rhs2_ptr (stmt);
1385     }
1386
1387   zero = integer_zero_node;
1388   const_iv.step = integer_zero_node;
1389
1390   if (TREE_CODE (*op0) == SSA_NAME)
1391     iv0 = get_iv (data, *op0);
1392   if (TREE_CODE (*op1) == SSA_NAME)
1393     iv1 = get_iv (data, *op1);
1394
1395   /* Exactly one of the compared values must be an iv, and the other one must
1396      be an invariant.  */
1397   if (!iv0 || !iv1)
1398     goto end;
1399
1400   if (integer_zerop (iv0->step))
1401     {
1402       /* Control variable may be on the other side.  */
1403       tmp_op = op0; op0 = op1; op1 = tmp_op;
1404       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1405     }
1406   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1407
1408 end:
1409   if (control_var)
1410     *control_var = op0;;
1411   if (iv_var)
1412     *iv_var = iv0;;
1413   if (bound)
1414     *bound = op1;
1415   if (iv_bound)
1416     *iv_bound = iv1;
1417
1418   return ret;
1419 }
1420
1421 /* Checks whether the condition in STMT is interesting and if so,
1422    records it.  */
1423
1424 static void
1425 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1426 {
1427   tree *var_p, *bound_p;
1428   struct iv *var_iv, *civ;
1429
1430   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1431     {
1432       find_interesting_uses_op (data, *var_p);
1433       find_interesting_uses_op (data, *bound_p);
1434       return;
1435     }
1436
1437   civ = XNEW (struct iv);
1438   *civ = *var_iv;
1439   record_use (data, NULL, civ, stmt, USE_COMPARE);
1440 }
1441
1442 /* Returns the outermost loop EXPR is obviously invariant in
1443    relative to the loop LOOP, i.e. if all its operands are defined
1444    outside of the returned loop.  Returns NULL if EXPR is not
1445    even obviously invariant in LOOP.  */
1446
1447 struct loop *
1448 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1449 {
1450   basic_block def_bb;
1451   unsigned i, len;
1452
1453   if (is_gimple_min_invariant (expr))
1454     return current_loops->tree_root;
1455
1456   if (TREE_CODE (expr) == SSA_NAME)
1457     {
1458       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1459       if (def_bb)
1460         {
1461           if (flow_bb_inside_loop_p (loop, def_bb))
1462             return NULL;
1463           return superloop_at_depth (loop,
1464                                      loop_depth (def_bb->loop_father) + 1);
1465         }
1466
1467       return current_loops->tree_root;
1468     }
1469
1470   if (!EXPR_P (expr))
1471     return NULL;
1472
1473   unsigned maxdepth = 0;
1474   len = TREE_OPERAND_LENGTH (expr);
1475   for (i = 0; i < len; i++)
1476     {
1477       struct loop *ivloop;
1478       if (!TREE_OPERAND (expr, i))
1479         continue;
1480
1481       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1482       if (!ivloop)
1483         return NULL;
1484       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1485     }
1486
1487   return superloop_at_depth (loop, maxdepth);
1488 }
1489
1490 /* Returns true if expression EXPR is obviously invariant in LOOP,
1491    i.e. if all its operands are defined outside of the LOOP.  LOOP
1492    should not be the function body.  */
1493
1494 bool
1495 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1496 {
1497   basic_block def_bb;
1498   unsigned i, len;
1499
1500   gcc_assert (loop_depth (loop) > 0);
1501
1502   if (is_gimple_min_invariant (expr))
1503     return true;
1504
1505   if (TREE_CODE (expr) == SSA_NAME)
1506     {
1507       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1508       if (def_bb
1509           && flow_bb_inside_loop_p (loop, def_bb))
1510         return false;
1511
1512       return true;
1513     }
1514
1515   if (!EXPR_P (expr))
1516     return false;
1517
1518   len = TREE_OPERAND_LENGTH (expr);
1519   for (i = 0; i < len; i++)
1520     if (TREE_OPERAND (expr, i)
1521         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1522       return false;
1523
1524   return true;
1525 }
1526
1527 /* Cumulates the steps of indices into DATA and replaces their values with the
1528    initial ones.  Returns false when the value of the index cannot be determined.
1529    Callback for for_each_index.  */
1530
1531 struct ifs_ivopts_data
1532 {
1533   struct ivopts_data *ivopts_data;
1534   gimple stmt;
1535   tree step;
1536 };
1537
1538 static bool
1539 idx_find_step (tree base, tree *idx, void *data)
1540 {
1541   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1542   struct iv *iv;
1543   tree step, iv_base, iv_step, lbound, off;
1544   struct loop *loop = dta->ivopts_data->current_loop;
1545
1546   /* If base is a component ref, require that the offset of the reference
1547      be invariant.  */
1548   if (TREE_CODE (base) == COMPONENT_REF)
1549     {
1550       off = component_ref_field_offset (base);
1551       return expr_invariant_in_loop_p (loop, off);
1552     }
1553
1554   /* If base is array, first check whether we will be able to move the
1555      reference out of the loop (in order to take its address in strength
1556      reduction).  In order for this to work we need both lower bound
1557      and step to be loop invariants.  */
1558   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1559     {
1560       /* Moreover, for a range, the size needs to be invariant as well.  */
1561       if (TREE_CODE (base) == ARRAY_RANGE_REF
1562           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1563         return false;
1564
1565       step = array_ref_element_size (base);
1566       lbound = array_ref_low_bound (base);
1567
1568       if (!expr_invariant_in_loop_p (loop, step)
1569           || !expr_invariant_in_loop_p (loop, lbound))
1570         return false;
1571     }
1572
1573   if (TREE_CODE (*idx) != SSA_NAME)
1574     return true;
1575
1576   iv = get_iv (dta->ivopts_data, *idx);
1577   if (!iv)
1578     return false;
1579
1580   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1581           *&x[0], which is not folded and does not trigger the
1582           ARRAY_REF path below.  */
1583   *idx = iv->base;
1584
1585   if (integer_zerop (iv->step))
1586     return true;
1587
1588   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1589     {
1590       step = array_ref_element_size (base);
1591
1592       /* We only handle addresses whose step is an integer constant.  */
1593       if (TREE_CODE (step) != INTEGER_CST)
1594         return false;
1595     }
1596   else
1597     /* The step for pointer arithmetics already is 1 byte.  */
1598     step = size_one_node;
1599
1600   iv_base = iv->base;
1601   iv_step = iv->step;
1602   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1603                             sizetype, &iv_base, &iv_step, dta->stmt,
1604                             false))
1605     {
1606       /* The index might wrap.  */
1607       return false;
1608     }
1609
1610   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1611   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1612
1613   return true;
1614 }
1615
1616 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1617    object is passed to it in DATA.  */
1618
1619 static bool
1620 idx_record_use (tree base, tree *idx,
1621                 void *vdata)
1622 {
1623   struct ivopts_data *data = (struct ivopts_data *) vdata;
1624   find_interesting_uses_op (data, *idx);
1625   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1626     {
1627       find_interesting_uses_op (data, array_ref_element_size (base));
1628       find_interesting_uses_op (data, array_ref_low_bound (base));
1629     }
1630   return true;
1631 }
1632
1633 /* If we can prove that TOP = cst * BOT for some constant cst,
1634    store cst to MUL and return true.  Otherwise return false.
1635    The returned value is always sign-extended, regardless of the
1636    signedness of TOP and BOT.  */
1637
1638 static bool
1639 constant_multiple_of (tree top, tree bot, widest_int *mul)
1640 {
1641   tree mby;
1642   enum tree_code code;
1643   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1644   widest_int res, p0, p1;
1645
1646   STRIP_NOPS (top);
1647   STRIP_NOPS (bot);
1648
1649   if (operand_equal_p (top, bot, 0))
1650     {
1651       *mul = 1;
1652       return true;
1653     }
1654
1655   code = TREE_CODE (top);
1656   switch (code)
1657     {
1658     case MULT_EXPR:
1659       mby = TREE_OPERAND (top, 1);
1660       if (TREE_CODE (mby) != INTEGER_CST)
1661         return false;
1662
1663       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1664         return false;
1665
1666       *mul = wi::sext (res * wi::to_widest (mby), precision);
1667       return true;
1668
1669     case PLUS_EXPR:
1670     case MINUS_EXPR:
1671       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1672           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1673         return false;
1674
1675       if (code == MINUS_EXPR)
1676         p1 = -p1;
1677       *mul = wi::sext (p0 + p1, precision);
1678       return true;
1679
1680     case INTEGER_CST:
1681       if (TREE_CODE (bot) != INTEGER_CST)
1682         return false;
1683
1684       p0 = widest_int::from (top, SIGNED);
1685       p1 = widest_int::from (bot, SIGNED);
1686       if (p1 == 0)
1687         return false;
1688       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1689       return res == 0;
1690
1691     default:
1692       return false;
1693     }
1694 }
1695
1696 /* Return true if memory reference REF with step STEP may be unaligned.  */
1697
1698 static bool
1699 may_be_unaligned_p (tree ref, tree step)
1700 {
1701   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1702      thus they are not misaligned.  */
1703   if (TREE_CODE (ref) == TARGET_MEM_REF)
1704     return false;
1705
1706   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1707
1708   unsigned HOST_WIDE_INT bitpos;
1709   unsigned int ref_align;
1710   get_object_alignment_1 (ref, &ref_align, &bitpos);
1711   if (ref_align < align
1712       || (bitpos % align) != 0
1713       || (bitpos % BITS_PER_UNIT) != 0)
1714     return true;
1715
1716   unsigned int trailing_zeros = tree_ctz (step);
1717   if (trailing_zeros < HOST_BITS_PER_INT
1718       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1719     return true;
1720
1721   return false;
1722 }
1723
1724 /* Return true if EXPR may be non-addressable.   */
1725
1726 bool
1727 may_be_nonaddressable_p (tree expr)
1728 {
1729   switch (TREE_CODE (expr))
1730     {
1731     case TARGET_MEM_REF:
1732       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1733          target, thus they are always addressable.  */
1734       return false;
1735
1736     case COMPONENT_REF:
1737       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1738              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1739
1740     case VIEW_CONVERT_EXPR:
1741       /* This kind of view-conversions may wrap non-addressable objects
1742          and make them look addressable.  After some processing the
1743          non-addressability may be uncovered again, causing ADDR_EXPRs
1744          of inappropriate objects to be built.  */
1745       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1746           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1747         return true;
1748
1749       /* ... fall through ... */
1750
1751     case ARRAY_REF:
1752     case ARRAY_RANGE_REF:
1753       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1754
1755     CASE_CONVERT:
1756       return true;
1757
1758     default:
1759       break;
1760     }
1761
1762   return false;
1763 }
1764
1765 /* Finds addresses in *OP_P inside STMT.  */
1766
1767 static void
1768 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1769 {
1770   tree base = *op_p, step = size_zero_node;
1771   struct iv *civ;
1772   struct ifs_ivopts_data ifs_ivopts_data;
1773
1774   /* Do not play with volatile memory references.  A bit too conservative,
1775      perhaps, but safe.  */
1776   if (gimple_has_volatile_ops (stmt))
1777     goto fail;
1778
1779   /* Ignore bitfields for now.  Not really something terribly complicated
1780      to handle.  TODO.  */
1781   if (TREE_CODE (base) == BIT_FIELD_REF)
1782     goto fail;
1783
1784   base = unshare_expr (base);
1785
1786   if (TREE_CODE (base) == TARGET_MEM_REF)
1787     {
1788       tree type = build_pointer_type (TREE_TYPE (base));
1789       tree astep;
1790
1791       if (TMR_BASE (base)
1792           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1793         {
1794           civ = get_iv (data, TMR_BASE (base));
1795           if (!civ)
1796             goto fail;
1797
1798           TMR_BASE (base) = civ->base;
1799           step = civ->step;
1800         }
1801       if (TMR_INDEX2 (base)
1802           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1803         {
1804           civ = get_iv (data, TMR_INDEX2 (base));
1805           if (!civ)
1806             goto fail;
1807
1808           TMR_INDEX2 (base) = civ->base;
1809           step = civ->step;
1810         }
1811       if (TMR_INDEX (base)
1812           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1813         {
1814           civ = get_iv (data, TMR_INDEX (base));
1815           if (!civ)
1816             goto fail;
1817
1818           TMR_INDEX (base) = civ->base;
1819           astep = civ->step;
1820
1821           if (astep)
1822             {
1823               if (TMR_STEP (base))
1824                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1825
1826               step = fold_build2 (PLUS_EXPR, type, step, astep);
1827             }
1828         }
1829
1830       if (integer_zerop (step))
1831         goto fail;
1832       base = tree_mem_ref_addr (type, base);
1833     }
1834   else
1835     {
1836       ifs_ivopts_data.ivopts_data = data;
1837       ifs_ivopts_data.stmt = stmt;
1838       ifs_ivopts_data.step = size_zero_node;
1839       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1840           || integer_zerop (ifs_ivopts_data.step))
1841         goto fail;
1842       step = ifs_ivopts_data.step;
1843
1844       /* Check that the base expression is addressable.  This needs
1845          to be done after substituting bases of IVs into it.  */
1846       if (may_be_nonaddressable_p (base))
1847         goto fail;
1848
1849       /* Moreover, on strict alignment platforms, check that it is
1850          sufficiently aligned.  */
1851       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1852         goto fail;
1853
1854       base = build_fold_addr_expr (base);
1855
1856       /* Substituting bases of IVs into the base expression might
1857          have caused folding opportunities.  */
1858       if (TREE_CODE (base) == ADDR_EXPR)
1859         {
1860           tree *ref = &TREE_OPERAND (base, 0);
1861           while (handled_component_p (*ref))
1862             ref = &TREE_OPERAND (*ref, 0);
1863           if (TREE_CODE (*ref) == MEM_REF)
1864             {
1865               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1866                                       TREE_OPERAND (*ref, 0),
1867                                       TREE_OPERAND (*ref, 1));
1868               if (tem)
1869                 *ref = tem;
1870             }
1871         }
1872     }
1873
1874   civ = alloc_iv (base, step);
1875   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1876   return;
1877
1878 fail:
1879   for_each_index (op_p, idx_record_use, data);
1880 }
1881
1882 /* Finds and records invariants used in STMT.  */
1883
1884 static void
1885 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1886 {
1887   ssa_op_iter iter;
1888   use_operand_p use_p;
1889   tree op;
1890
1891   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1892     {
1893       op = USE_FROM_PTR (use_p);
1894       record_invariant (data, op, false);
1895     }
1896 }
1897
1898 /* Finds interesting uses of induction variables in the statement STMT.  */
1899
1900 static void
1901 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1902 {
1903   struct iv *iv;
1904   tree op, *lhs, *rhs;
1905   ssa_op_iter iter;
1906   use_operand_p use_p;
1907   enum tree_code code;
1908
1909   find_invariants_stmt (data, stmt);
1910
1911   if (gimple_code (stmt) == GIMPLE_COND)
1912     {
1913       find_interesting_uses_cond (data, stmt);
1914       return;
1915     }
1916
1917   if (is_gimple_assign (stmt))
1918     {
1919       lhs = gimple_assign_lhs_ptr (stmt);
1920       rhs = gimple_assign_rhs1_ptr (stmt);
1921
1922       if (TREE_CODE (*lhs) == SSA_NAME)
1923         {
1924           /* If the statement defines an induction variable, the uses are not
1925              interesting by themselves.  */
1926
1927           iv = get_iv (data, *lhs);
1928
1929           if (iv && !integer_zerop (iv->step))
1930             return;
1931         }
1932
1933       code = gimple_assign_rhs_code (stmt);
1934       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1935           && (REFERENCE_CLASS_P (*rhs)
1936               || is_gimple_val (*rhs)))
1937         {
1938           if (REFERENCE_CLASS_P (*rhs))
1939             find_interesting_uses_address (data, stmt, rhs);
1940           else
1941             find_interesting_uses_op (data, *rhs);
1942
1943           if (REFERENCE_CLASS_P (*lhs))
1944             find_interesting_uses_address (data, stmt, lhs);
1945           return;
1946         }
1947       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1948         {
1949           find_interesting_uses_cond (data, stmt);
1950           return;
1951         }
1952
1953       /* TODO -- we should also handle address uses of type
1954
1955          memory = call (whatever);
1956
1957          and
1958
1959          call (memory).  */
1960     }
1961
1962   if (gimple_code (stmt) == GIMPLE_PHI
1963       && gimple_bb (stmt) == data->current_loop->header)
1964     {
1965       iv = get_iv (data, PHI_RESULT (stmt));
1966
1967       if (iv && !integer_zerop (iv->step))
1968         return;
1969     }
1970
1971   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1972     {
1973       op = USE_FROM_PTR (use_p);
1974
1975       if (TREE_CODE (op) != SSA_NAME)
1976         continue;
1977
1978       iv = get_iv (data, op);
1979       if (!iv)
1980         continue;
1981
1982       find_interesting_uses_op (data, op);
1983     }
1984 }
1985
1986 /* Finds interesting uses of induction variables outside of loops
1987    on loop exit edge EXIT.  */
1988
1989 static void
1990 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1991 {
1992   gimple phi;
1993   gimple_stmt_iterator psi;
1994   tree def;
1995
1996   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1997     {
1998       phi = gsi_stmt (psi);
1999       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2000       if (!virtual_operand_p (def))
2001         find_interesting_uses_op (data, def);
2002     }
2003 }
2004
2005 /* Finds uses of the induction variables that are interesting.  */
2006
2007 static void
2008 find_interesting_uses (struct ivopts_data *data)
2009 {
2010   basic_block bb;
2011   gimple_stmt_iterator bsi;
2012   basic_block *body = get_loop_body (data->current_loop);
2013   unsigned i;
2014   struct version_info *info;
2015   edge e;
2016
2017   if (dump_file && (dump_flags & TDF_DETAILS))
2018     fprintf (dump_file, "Uses:\n\n");
2019
2020   for (i = 0; i < data->current_loop->num_nodes; i++)
2021     {
2022       edge_iterator ei;
2023       bb = body[i];
2024
2025       FOR_EACH_EDGE (e, ei, bb->succs)
2026         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2027             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2028           find_interesting_uses_outside (data, e);
2029
2030       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2031         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2032       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2033         if (!is_gimple_debug (gsi_stmt (bsi)))
2034           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2035     }
2036
2037   if (dump_file && (dump_flags & TDF_DETAILS))
2038     {
2039       bitmap_iterator bi;
2040
2041       fprintf (dump_file, "\n");
2042
2043       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2044         {
2045           info = ver_info (data, i);
2046           if (info->inv_id)
2047             {
2048               fprintf (dump_file, "  ");
2049               print_generic_expr (dump_file, info->name, TDF_SLIM);
2050               fprintf (dump_file, " is invariant (%d)%s\n",
2051                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2052             }
2053         }
2054
2055       fprintf (dump_file, "\n");
2056     }
2057
2058   free (body);
2059 }
2060
2061 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2062    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2063    we are at the top-level of the processed address.  */
2064
2065 static tree
2066 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2067                 HOST_WIDE_INT *offset)
2068 {
2069   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2070   enum tree_code code;
2071   tree type, orig_type = TREE_TYPE (expr);
2072   HOST_WIDE_INT off0, off1, st;
2073   tree orig_expr = expr;
2074
2075   STRIP_NOPS (expr);
2076
2077   type = TREE_TYPE (expr);
2078   code = TREE_CODE (expr);
2079   *offset = 0;
2080
2081   switch (code)
2082     {
2083     case INTEGER_CST:
2084       if (!cst_and_fits_in_hwi (expr)
2085           || integer_zerop (expr))
2086         return orig_expr;
2087
2088       *offset = int_cst_value (expr);
2089       return build_int_cst (orig_type, 0);
2090
2091     case POINTER_PLUS_EXPR:
2092     case PLUS_EXPR:
2093     case MINUS_EXPR:
2094       op0 = TREE_OPERAND (expr, 0);
2095       op1 = TREE_OPERAND (expr, 1);
2096
2097       op0 = strip_offset_1 (op0, false, false, &off0);
2098       op1 = strip_offset_1 (op1, false, false, &off1);
2099
2100       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2101       if (op0 == TREE_OPERAND (expr, 0)
2102           && op1 == TREE_OPERAND (expr, 1))
2103         return orig_expr;
2104
2105       if (integer_zerop (op1))
2106         expr = op0;
2107       else if (integer_zerop (op0))
2108         {
2109           if (code == MINUS_EXPR)
2110             expr = fold_build1 (NEGATE_EXPR, type, op1);
2111           else
2112             expr = op1;
2113         }
2114       else
2115         expr = fold_build2 (code, type, op0, op1);
2116
2117       return fold_convert (orig_type, expr);
2118
2119     case MULT_EXPR:
2120       op1 = TREE_OPERAND (expr, 1);
2121       if (!cst_and_fits_in_hwi (op1))
2122         return orig_expr;
2123
2124       op0 = TREE_OPERAND (expr, 0);
2125       op0 = strip_offset_1 (op0, false, false, &off0);
2126       if (op0 == TREE_OPERAND (expr, 0))
2127         return orig_expr;
2128
2129       *offset = off0 * int_cst_value (op1);
2130       if (integer_zerop (op0))
2131         expr = op0;
2132       else
2133         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2134
2135       return fold_convert (orig_type, expr);
2136
2137     case ARRAY_REF:
2138     case ARRAY_RANGE_REF:
2139       if (!inside_addr)
2140         return orig_expr;
2141
2142       step = array_ref_element_size (expr);
2143       if (!cst_and_fits_in_hwi (step))
2144         break;
2145
2146       st = int_cst_value (step);
2147       op1 = TREE_OPERAND (expr, 1);
2148       op1 = strip_offset_1 (op1, false, false, &off1);
2149       *offset = off1 * st;
2150
2151       if (top_compref
2152           && integer_zerop (op1))
2153         {
2154           /* Strip the component reference completely.  */
2155           op0 = TREE_OPERAND (expr, 0);
2156           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2157           *offset += off0;
2158           return op0;
2159         }
2160       break;
2161
2162     case COMPONENT_REF:
2163       {
2164         tree field;
2165
2166         if (!inside_addr)
2167           return orig_expr;
2168
2169         tmp = component_ref_field_offset (expr);
2170         field = TREE_OPERAND (expr, 1);
2171         if (top_compref
2172             && cst_and_fits_in_hwi (tmp)
2173             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2174           {
2175             HOST_WIDE_INT boffset, abs_off;
2176
2177             /* Strip the component reference completely.  */
2178             op0 = TREE_OPERAND (expr, 0);
2179             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2180             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2181             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2182             if (boffset < 0)
2183               abs_off = -abs_off;
2184
2185             *offset = off0 + int_cst_value (tmp) + abs_off;
2186             return op0;
2187           }
2188       }
2189       break;
2190
2191     case ADDR_EXPR:
2192       op0 = TREE_OPERAND (expr, 0);
2193       op0 = strip_offset_1 (op0, true, true, &off0);
2194       *offset += off0;
2195
2196       if (op0 == TREE_OPERAND (expr, 0))
2197         return orig_expr;
2198
2199       expr = build_fold_addr_expr (op0);
2200       return fold_convert (orig_type, expr);
2201
2202     case MEM_REF:
2203       /* ???  Offset operand?  */
2204       inside_addr = false;
2205       break;
2206
2207     default:
2208       return orig_expr;
2209     }
2210
2211   /* Default handling of expressions for that we want to recurse into
2212      the first operand.  */
2213   op0 = TREE_OPERAND (expr, 0);
2214   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2215   *offset += off0;
2216
2217   if (op0 == TREE_OPERAND (expr, 0)
2218       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2219     return orig_expr;
2220
2221   expr = copy_node (expr);
2222   TREE_OPERAND (expr, 0) = op0;
2223   if (op1)
2224     TREE_OPERAND (expr, 1) = op1;
2225
2226   /* Inside address, we might strip the top level component references,
2227      thus changing type of the expression.  Handling of ADDR_EXPR
2228      will fix that.  */
2229   expr = fold_convert (orig_type, expr);
2230
2231   return expr;
2232 }
2233
2234 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2235
2236 static tree
2237 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2238 {
2239   HOST_WIDE_INT off;
2240   tree core = strip_offset_1 (expr, false, false, &off);
2241   *offset = off;
2242   return core;
2243 }
2244
2245 /* Returns variant of TYPE that can be used as base for different uses.
2246    We return unsigned type with the same precision, which avoids problems
2247    with overflows.  */
2248
2249 static tree
2250 generic_type_for (tree type)
2251 {
2252   if (POINTER_TYPE_P (type))
2253     return unsigned_type_for (type);
2254
2255   if (TYPE_UNSIGNED (type))
2256     return type;
2257
2258   return unsigned_type_for (type);
2259 }
2260
2261 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2262    the bitmap to that we should store it.  */
2263
2264 static struct ivopts_data *fd_ivopts_data;
2265 static tree
2266 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2267 {
2268   bitmap *depends_on = (bitmap *) data;
2269   struct version_info *info;
2270
2271   if (TREE_CODE (*expr_p) != SSA_NAME)
2272     return NULL_TREE;
2273   info = name_info (fd_ivopts_data, *expr_p);
2274
2275   if (!info->inv_id || info->has_nonlin_use)
2276     return NULL_TREE;
2277
2278   if (!*depends_on)
2279     *depends_on = BITMAP_ALLOC (NULL);
2280   bitmap_set_bit (*depends_on, info->inv_id);
2281
2282   return NULL_TREE;
2283 }
2284
2285 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2286    position to POS.  If USE is not NULL, the candidate is set as related to
2287    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2288    replacement of the final value of the iv by a direct computation.  */
2289
2290 static struct iv_cand *
2291 add_candidate_1 (struct ivopts_data *data,
2292                  tree base, tree step, bool important, enum iv_position pos,
2293                  struct iv_use *use, gimple incremented_at)
2294 {
2295   unsigned i;
2296   struct iv_cand *cand = NULL;
2297   tree type, orig_type;
2298
2299   /* For non-original variables, make sure their values are computed in a type
2300      that does not invoke undefined behavior on overflows (since in general,
2301      we cannot prove that these induction variables are non-wrapping).  */
2302   if (pos != IP_ORIGINAL)
2303     {
2304       orig_type = TREE_TYPE (base);
2305       type = generic_type_for (orig_type);
2306       if (type != orig_type)
2307         {
2308           base = fold_convert (type, base);
2309           step = fold_convert (type, step);
2310         }
2311     }
2312
2313   for (i = 0; i < n_iv_cands (data); i++)
2314     {
2315       cand = iv_cand (data, i);
2316
2317       if (cand->pos != pos)
2318         continue;
2319
2320       if (cand->incremented_at != incremented_at
2321           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2322               && cand->ainc_use != use))
2323         continue;
2324
2325       if (!cand->iv)
2326         {
2327           if (!base && !step)
2328             break;
2329
2330           continue;
2331         }
2332
2333       if (!base && !step)
2334         continue;
2335
2336       if (operand_equal_p (base, cand->iv->base, 0)
2337           && operand_equal_p (step, cand->iv->step, 0)
2338           && (TYPE_PRECISION (TREE_TYPE (base))
2339               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2340         break;
2341     }
2342
2343   if (i == n_iv_cands (data))
2344     {
2345       cand = XCNEW (struct iv_cand);
2346       cand->id = i;
2347
2348       if (!base && !step)
2349         cand->iv = NULL;
2350       else
2351         cand->iv = alloc_iv (base, step);
2352
2353       cand->pos = pos;
2354       if (pos != IP_ORIGINAL && cand->iv)
2355         {
2356           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2357           cand->var_after = cand->var_before;
2358         }
2359       cand->important = important;
2360       cand->incremented_at = incremented_at;
2361       data->iv_candidates.safe_push (cand);
2362
2363       if (step
2364           && TREE_CODE (step) != INTEGER_CST)
2365         {
2366           fd_ivopts_data = data;
2367           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2368         }
2369
2370       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2371         cand->ainc_use = use;
2372       else
2373         cand->ainc_use = NULL;
2374
2375       if (dump_file && (dump_flags & TDF_DETAILS))
2376         dump_cand (dump_file, cand);
2377     }
2378
2379   if (important && !cand->important)
2380     {
2381       cand->important = true;
2382       if (dump_file && (dump_flags & TDF_DETAILS))
2383         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2384     }
2385
2386   if (use)
2387     {
2388       bitmap_set_bit (use->related_cands, i);
2389       if (dump_file && (dump_flags & TDF_DETAILS))
2390         fprintf (dump_file, "Candidate %d is related to use %d\n",
2391                  cand->id, use->id);
2392     }
2393
2394   return cand;
2395 }
2396
2397 /* Returns true if incrementing the induction variable at the end of the LOOP
2398    is allowed.
2399
2400    The purpose is to avoid splitting latch edge with a biv increment, thus
2401    creating a jump, possibly confusing other optimization passes and leaving
2402    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2403    is not available (so we do not have a better alternative), or if the latch
2404    edge is already nonempty.  */
2405
2406 static bool
2407 allow_ip_end_pos_p (struct loop *loop)
2408 {
2409   if (!ip_normal_pos (loop))
2410     return true;
2411
2412   if (!empty_block_p (ip_end_pos (loop)))
2413     return true;
2414
2415   return false;
2416 }
2417
2418 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2419    Important field is set to IMPORTANT.  */
2420
2421 static void
2422 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2423                         bool important, struct iv_use *use)
2424 {
2425   basic_block use_bb = gimple_bb (use->stmt);
2426   enum machine_mode mem_mode;
2427   unsigned HOST_WIDE_INT cstepi;
2428
2429   /* If we insert the increment in any position other than the standard
2430      ones, we must ensure that it is incremented once per iteration.
2431      It must not be in an inner nested loop, or one side of an if
2432      statement.  */
2433   if (use_bb->loop_father != data->current_loop
2434       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2435       || stmt_could_throw_p (use->stmt)
2436       || !cst_and_fits_in_hwi (step))
2437     return;
2438
2439   cstepi = int_cst_value (step);
2440
2441   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2442   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2443         || USE_STORE_PRE_INCREMENT (mem_mode))
2444        && GET_MODE_SIZE (mem_mode) == cstepi)
2445       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2446            || USE_STORE_PRE_DECREMENT (mem_mode))
2447           && GET_MODE_SIZE (mem_mode) == -cstepi))
2448     {
2449       enum tree_code code = MINUS_EXPR;
2450       tree new_base;
2451       tree new_step = step;
2452
2453       if (POINTER_TYPE_P (TREE_TYPE (base)))
2454         {
2455           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2456           code = POINTER_PLUS_EXPR;
2457         }
2458       else
2459         new_step = fold_convert (TREE_TYPE (base), new_step);
2460       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2461       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2462                        use->stmt);
2463     }
2464   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2465         || USE_STORE_POST_INCREMENT (mem_mode))
2466        && GET_MODE_SIZE (mem_mode) == cstepi)
2467       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2468            || USE_STORE_POST_DECREMENT (mem_mode))
2469           && GET_MODE_SIZE (mem_mode) == -cstepi))
2470     {
2471       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2472                        use->stmt);
2473     }
2474 }
2475
2476 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2477    position to POS.  If USE is not NULL, the candidate is set as related to
2478    it.  The candidate computation is scheduled on all available positions.  */
2479
2480 static void
2481 add_candidate (struct ivopts_data *data,
2482                tree base, tree step, bool important, struct iv_use *use)
2483 {
2484   if (ip_normal_pos (data->current_loop))
2485     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2486   if (ip_end_pos (data->current_loop)
2487       && allow_ip_end_pos_p (data->current_loop))
2488     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2489
2490   if (use != NULL && use->type == USE_ADDRESS)
2491     add_autoinc_candidates (data, base, step, important, use);
2492 }
2493
2494 /* Adds standard iv candidates.  */
2495
2496 static void
2497 add_standard_iv_candidates (struct ivopts_data *data)
2498 {
2499   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2500
2501   /* The same for a double-integer type if it is still fast enough.  */
2502   if (TYPE_PRECISION
2503         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2504       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2505     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2506                    build_int_cst (long_integer_type_node, 1), true, NULL);
2507
2508   /* The same for a double-integer type if it is still fast enough.  */
2509   if (TYPE_PRECISION
2510         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2511       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2512     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2513                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2514 }
2515
2516
2517 /* Adds candidates bases on the old induction variable IV.  */
2518
2519 static void
2520 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2521 {
2522   gimple phi;
2523   tree def;
2524   struct iv_cand *cand;
2525
2526   add_candidate (data, iv->base, iv->step, true, NULL);
2527
2528   /* The same, but with initial value zero.  */
2529   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2530     add_candidate (data, size_int (0), iv->step, true, NULL);
2531   else
2532     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2533                    iv->step, true, NULL);
2534
2535   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2536   if (gimple_code (phi) == GIMPLE_PHI)
2537     {
2538       /* Additionally record the possibility of leaving the original iv
2539          untouched.  */
2540       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2541       /* Don't add candidate if it's from another PHI node because
2542          it's an affine iv appearing in the form of PEELED_CHREC.  */
2543       phi = SSA_NAME_DEF_STMT (def);
2544       if (gimple_code (phi) != GIMPLE_PHI)
2545         {
2546           cand = add_candidate_1 (data,
2547                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2548                                   SSA_NAME_DEF_STMT (def));
2549           cand->var_before = iv->ssa_name;
2550           cand->var_after = def;
2551         }
2552       else
2553         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2554     }
2555 }
2556
2557 /* Adds candidates based on the old induction variables.  */
2558
2559 static void
2560 add_old_ivs_candidates (struct ivopts_data *data)
2561 {
2562   unsigned i;
2563   struct iv *iv;
2564   bitmap_iterator bi;
2565
2566   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2567     {
2568       iv = ver_info (data, i)->iv;
2569       if (iv && iv->biv_p && !integer_zerop (iv->step))
2570         add_old_iv_candidates (data, iv);
2571     }
2572 }
2573
2574 /* Adds candidates based on the value of the induction variable IV and USE.  */
2575
2576 static void
2577 add_iv_value_candidates (struct ivopts_data *data,
2578                          struct iv *iv, struct iv_use *use)
2579 {
2580   unsigned HOST_WIDE_INT offset;
2581   tree base;
2582   tree basetype;
2583
2584   add_candidate (data, iv->base, iv->step, false, use);
2585
2586   /* The same, but with initial value zero.  Make such variable important,
2587      since it is generic enough so that possibly many uses may be based
2588      on it.  */
2589   basetype = TREE_TYPE (iv->base);
2590   if (POINTER_TYPE_P (basetype))
2591     basetype = sizetype;
2592   add_candidate (data, build_int_cst (basetype, 0),
2593                  iv->step, true, use);
2594
2595   /* Third, try removing the constant offset.  Make sure to even
2596      add a candidate for &a[0] vs. (T *)&a.  */
2597   base = strip_offset (iv->base, &offset);
2598   if (offset
2599       || base != iv->base)
2600     add_candidate (data, base, iv->step, false, use);
2601 }
2602
2603 /* Adds candidates based on the uses.  */
2604
2605 static void
2606 add_derived_ivs_candidates (struct ivopts_data *data)
2607 {
2608   unsigned i;
2609
2610   for (i = 0; i < n_iv_uses (data); i++)
2611     {
2612       struct iv_use *use = iv_use (data, i);
2613
2614       if (!use)
2615         continue;
2616
2617       switch (use->type)
2618         {
2619         case USE_NONLINEAR_EXPR:
2620         case USE_COMPARE:
2621         case USE_ADDRESS:
2622           /* Just add the ivs based on the value of the iv used here.  */
2623           add_iv_value_candidates (data, use->iv, use);
2624           break;
2625
2626         default:
2627           gcc_unreachable ();
2628         }
2629     }
2630 }
2631
2632 /* Record important candidates and add them to related_cands bitmaps
2633    if needed.  */
2634
2635 static void
2636 record_important_candidates (struct ivopts_data *data)
2637 {
2638   unsigned i;
2639   struct iv_use *use;
2640
2641   for (i = 0; i < n_iv_cands (data); i++)
2642     {
2643       struct iv_cand *cand = iv_cand (data, i);
2644
2645       if (cand->important)
2646         bitmap_set_bit (data->important_candidates, i);
2647     }
2648
2649   data->consider_all_candidates = (n_iv_cands (data)
2650                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2651
2652   if (data->consider_all_candidates)
2653     {
2654       /* We will not need "related_cands" bitmaps in this case,
2655          so release them to decrease peak memory consumption.  */
2656       for (i = 0; i < n_iv_uses (data); i++)
2657         {
2658           use = iv_use (data, i);
2659           BITMAP_FREE (use->related_cands);
2660         }
2661     }
2662   else
2663     {
2664       /* Add important candidates to the related_cands bitmaps.  */
2665       for (i = 0; i < n_iv_uses (data); i++)
2666         bitmap_ior_into (iv_use (data, i)->related_cands,
2667                          data->important_candidates);
2668     }
2669 }
2670
2671 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2672    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2673    we allocate a simple list to every use.  */
2674
2675 static void
2676 alloc_use_cost_map (struct ivopts_data *data)
2677 {
2678   unsigned i, size, s;
2679
2680   for (i = 0; i < n_iv_uses (data); i++)
2681     {
2682       struct iv_use *use = iv_use (data, i);
2683
2684       if (data->consider_all_candidates)
2685         size = n_iv_cands (data);
2686       else
2687         {
2688           s = bitmap_count_bits (use->related_cands);
2689
2690           /* Round up to the power of two, so that moduling by it is fast.  */
2691           size = s ? (1 << ceil_log2 (s)) : 1;
2692         }
2693
2694       use->n_map_members = size;
2695       use->cost_map = XCNEWVEC (struct cost_pair, size);
2696     }
2697 }
2698
2699 /* Returns description of computation cost of expression whose runtime
2700    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2701
2702 static comp_cost
2703 new_cost (unsigned runtime, unsigned complexity)
2704 {
2705   comp_cost cost;
2706
2707   cost.cost = runtime;
2708   cost.complexity = complexity;
2709
2710   return cost;
2711 }
2712
2713 /* Adds costs COST1 and COST2.  */
2714
2715 static comp_cost
2716 add_costs (comp_cost cost1, comp_cost cost2)
2717 {
2718   cost1.cost += cost2.cost;
2719   cost1.complexity += cost2.complexity;
2720
2721   return cost1;
2722 }
2723 /* Subtracts costs COST1 and COST2.  */
2724
2725 static comp_cost
2726 sub_costs (comp_cost cost1, comp_cost cost2)
2727 {
2728   cost1.cost -= cost2.cost;
2729   cost1.complexity -= cost2.complexity;
2730
2731   return cost1;
2732 }
2733
2734 /* Returns a negative number if COST1 < COST2, a positive number if
2735    COST1 > COST2, and 0 if COST1 = COST2.  */
2736
2737 static int
2738 compare_costs (comp_cost cost1, comp_cost cost2)
2739 {
2740   if (cost1.cost == cost2.cost)
2741     return cost1.complexity - cost2.complexity;
2742
2743   return cost1.cost - cost2.cost;
2744 }
2745
2746 /* Returns true if COST is infinite.  */
2747
2748 static bool
2749 infinite_cost_p (comp_cost cost)
2750 {
2751   return cost.cost == INFTY;
2752 }
2753
2754 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2755    on invariants DEPENDS_ON and that the value used in expressing it
2756    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2757
2758 static void
2759 set_use_iv_cost (struct ivopts_data *data,
2760                  struct iv_use *use, struct iv_cand *cand,
2761                  comp_cost cost, bitmap depends_on, tree value,
2762                  enum tree_code comp, int inv_expr_id)
2763 {
2764   unsigned i, s;
2765
2766   if (infinite_cost_p (cost))
2767     {
2768       BITMAP_FREE (depends_on);
2769       return;
2770     }
2771
2772   if (data->consider_all_candidates)
2773     {
2774       use->cost_map[cand->id].cand = cand;
2775       use->cost_map[cand->id].cost = cost;
2776       use->cost_map[cand->id].depends_on = depends_on;
2777       use->cost_map[cand->id].value = value;
2778       use->cost_map[cand->id].comp = comp;
2779       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2780       return;
2781     }
2782
2783   /* n_map_members is a power of two, so this computes modulo.  */
2784   s = cand->id & (use->n_map_members - 1);
2785   for (i = s; i < use->n_map_members; i++)
2786     if (!use->cost_map[i].cand)
2787       goto found;
2788   for (i = 0; i < s; i++)
2789     if (!use->cost_map[i].cand)
2790       goto found;
2791
2792   gcc_unreachable ();
2793
2794 found:
2795   use->cost_map[i].cand = cand;
2796   use->cost_map[i].cost = cost;
2797   use->cost_map[i].depends_on = depends_on;
2798   use->cost_map[i].value = value;
2799   use->cost_map[i].comp = comp;
2800   use->cost_map[i].inv_expr_id = inv_expr_id;
2801 }
2802
2803 /* Gets cost of (USE, CANDIDATE) pair.  */
2804
2805 static struct cost_pair *
2806 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2807                  struct iv_cand *cand)
2808 {
2809   unsigned i, s;
2810   struct cost_pair *ret;
2811
2812   if (!cand)
2813     return NULL;
2814
2815   if (data->consider_all_candidates)
2816     {
2817       ret = use->cost_map + cand->id;
2818       if (!ret->cand)
2819         return NULL;
2820
2821       return ret;
2822     }
2823
2824   /* n_map_members is a power of two, so this computes modulo.  */
2825   s = cand->id & (use->n_map_members - 1);
2826   for (i = s; i < use->n_map_members; i++)
2827     if (use->cost_map[i].cand == cand)
2828       return use->cost_map + i;
2829     else if (use->cost_map[i].cand == NULL)
2830       return NULL;
2831   for (i = 0; i < s; i++)
2832     if (use->cost_map[i].cand == cand)
2833       return use->cost_map + i;
2834     else if (use->cost_map[i].cand == NULL)
2835       return NULL;
2836
2837   return NULL;
2838 }
2839
2840 /* Returns estimate on cost of computing SEQ.  */
2841
2842 static unsigned
2843 seq_cost (rtx seq, bool speed)
2844 {
2845   unsigned cost = 0;
2846   rtx set;
2847
2848   for (; seq; seq = NEXT_INSN (seq))
2849     {
2850       set = single_set (seq);
2851       if (set)
2852         cost += set_src_cost (SET_SRC (set), speed);
2853       else
2854         cost++;
2855     }
2856
2857   return cost;
2858 }
2859
2860 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2861 static rtx
2862 produce_memory_decl_rtl (tree obj, int *regno)
2863 {
2864   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2865   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2866   rtx x;
2867
2868   gcc_assert (obj);
2869   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2870     {
2871       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2872       x = gen_rtx_SYMBOL_REF (address_mode, name);
2873       SET_SYMBOL_REF_DECL (x, obj);
2874       x = gen_rtx_MEM (DECL_MODE (obj), x);
2875       set_mem_addr_space (x, as);
2876       targetm.encode_section_info (obj, x, true);
2877     }
2878   else
2879     {
2880       x = gen_raw_REG (address_mode, (*regno)++);
2881       x = gen_rtx_MEM (DECL_MODE (obj), x);
2882       set_mem_addr_space (x, as);
2883     }
2884
2885   return x;
2886 }
2887
2888 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2889    walk_tree.  DATA contains the actual fake register number.  */
2890
2891 static tree
2892 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2893 {
2894   tree obj = NULL_TREE;
2895   rtx x = NULL_RTX;
2896   int *regno = (int *) data;
2897
2898   switch (TREE_CODE (*expr_p))
2899     {
2900     case ADDR_EXPR:
2901       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2902            handled_component_p (*expr_p);
2903            expr_p = &TREE_OPERAND (*expr_p, 0))
2904         continue;
2905       obj = *expr_p;
2906       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2907         x = produce_memory_decl_rtl (obj, regno);
2908       break;
2909
2910     case SSA_NAME:
2911       *ws = 0;
2912       obj = SSA_NAME_VAR (*expr_p);
2913       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2914       if (!obj)
2915         return NULL_TREE;
2916       if (!DECL_RTL_SET_P (obj))
2917         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2918       break;
2919
2920     case VAR_DECL:
2921     case PARM_DECL:
2922     case RESULT_DECL:
2923       *ws = 0;
2924       obj = *expr_p;
2925
2926       if (DECL_RTL_SET_P (obj))
2927         break;
2928
2929       if (DECL_MODE (obj) == BLKmode)
2930         x = produce_memory_decl_rtl (obj, regno);
2931       else
2932         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2933
2934       break;
2935
2936     default:
2937       break;
2938     }
2939
2940   if (x)
2941     {
2942       decl_rtl_to_reset.safe_push (obj);
2943       SET_DECL_RTL (obj, x);
2944     }
2945
2946   return NULL_TREE;
2947 }
2948
2949 /* Determines cost of the computation of EXPR.  */
2950
2951 static unsigned
2952 computation_cost (tree expr, bool speed)
2953 {
2954   rtx seq, rslt;
2955   tree type = TREE_TYPE (expr);
2956   unsigned cost;
2957   /* Avoid using hard regs in ways which may be unsupported.  */
2958   int regno = LAST_VIRTUAL_REGISTER + 1;
2959   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2960   enum node_frequency real_frequency = node->frequency;
2961
2962   node->frequency = NODE_FREQUENCY_NORMAL;
2963   crtl->maybe_hot_insn_p = speed;
2964   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2965   start_sequence ();
2966   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2967   seq = get_insns ();
2968   end_sequence ();
2969   default_rtl_profile ();
2970   node->frequency = real_frequency;
2971
2972   cost = seq_cost (seq, speed);
2973   if (MEM_P (rslt))
2974     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2975                           TYPE_ADDR_SPACE (type), speed);
2976   else if (!REG_P (rslt))
2977     cost += set_src_cost (rslt, speed);
2978
2979   return cost;
2980 }
2981
2982 /* Returns variable containing the value of candidate CAND at statement AT.  */
2983
2984 static tree
2985 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2986 {
2987   if (stmt_after_increment (loop, cand, stmt))
2988     return cand->var_after;
2989   else
2990     return cand->var_before;
2991 }
2992
2993 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2994    same precision that is at least as wide as the precision of TYPE, stores
2995    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2996    type of A and B.  */
2997
2998 static tree
2999 determine_common_wider_type (tree *a, tree *b)
3000 {
3001   tree wider_type = NULL;
3002   tree suba, subb;
3003   tree atype = TREE_TYPE (*a);
3004
3005   if (CONVERT_EXPR_P (*a))
3006     {
3007       suba = TREE_OPERAND (*a, 0);
3008       wider_type = TREE_TYPE (suba);
3009       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3010         return atype;
3011     }
3012   else
3013     return atype;
3014
3015   if (CONVERT_EXPR_P (*b))
3016     {
3017       subb = TREE_OPERAND (*b, 0);
3018       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3019         return atype;
3020     }
3021   else
3022     return atype;
3023
3024   *a = suba;
3025   *b = subb;
3026   return wider_type;
3027 }
3028
3029 /* Determines the expression by that USE is expressed from induction variable
3030    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3031    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3032
3033 static bool
3034 get_computation_aff (struct loop *loop,
3035                      struct iv_use *use, struct iv_cand *cand, gimple at,
3036                      struct aff_tree *aff)
3037 {
3038   tree ubase = use->iv->base;
3039   tree ustep = use->iv->step;
3040   tree cbase = cand->iv->base;
3041   tree cstep = cand->iv->step, cstep_common;
3042   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3043   tree common_type, var;
3044   tree uutype;
3045   aff_tree cbase_aff, var_aff;
3046   widest_int rat;
3047
3048   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3049     {
3050       /* We do not have a precision to express the values of use.  */
3051       return false;
3052     }
3053
3054   var = var_at_stmt (loop, cand, at);
3055   uutype = unsigned_type_for (utype);
3056
3057   /* If the conversion is not noop, perform it.  */
3058   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3059     {
3060       cstep = fold_convert (uutype, cstep);
3061       cbase = fold_convert (uutype, cbase);
3062       var = fold_convert (uutype, var);
3063     }
3064
3065   if (!constant_multiple_of (ustep, cstep, &rat))
3066     return false;
3067
3068   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3069      type, we achieve better folding by computing their difference in this
3070      wider type, and cast the result to UUTYPE.  We do not need to worry about
3071      overflows, as all the arithmetics will in the end be performed in UUTYPE
3072      anyway.  */
3073   common_type = determine_common_wider_type (&ubase, &cbase);
3074
3075   /* use = ubase - ratio * cbase + ratio * var.  */
3076   tree_to_aff_combination (ubase, common_type, aff);
3077   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3078   tree_to_aff_combination (var, uutype, &var_aff);
3079
3080   /* We need to shift the value if we are after the increment.  */
3081   if (stmt_after_increment (loop, cand, at))
3082     {
3083       aff_tree cstep_aff;
3084
3085       if (common_type != uutype)
3086         cstep_common = fold_convert (common_type, cstep);
3087       else
3088         cstep_common = cstep;
3089
3090       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3091       aff_combination_add (&cbase_aff, &cstep_aff);
3092     }
3093
3094   aff_combination_scale (&cbase_aff, -rat);
3095   aff_combination_add (aff, &cbase_aff);
3096   if (common_type != uutype)
3097     aff_combination_convert (aff, uutype);
3098
3099   aff_combination_scale (&var_aff, rat);
3100   aff_combination_add (aff, &var_aff);
3101
3102   return true;
3103 }
3104
3105 /* Return the type of USE.  */
3106
3107 static tree
3108 get_use_type (struct iv_use *use)
3109 {
3110   tree base_type = TREE_TYPE (use->iv->base);
3111   tree type;
3112
3113   if (use->type == USE_ADDRESS)
3114     {
3115       /* The base_type may be a void pointer.  Create a pointer type based on
3116          the mem_ref instead.  */
3117       type = build_pointer_type (TREE_TYPE (*use->op_p));
3118       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3119                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3120     }
3121   else
3122     type = base_type;
3123
3124   return type;
3125 }
3126
3127 /* Determines the expression by that USE is expressed from induction variable
3128    CAND at statement AT in LOOP.  The computation is unshared.  */
3129
3130 static tree
3131 get_computation_at (struct loop *loop,
3132                     struct iv_use *use, struct iv_cand *cand, gimple at)
3133 {
3134   aff_tree aff;
3135   tree type = get_use_type (use);
3136
3137   if (!get_computation_aff (loop, use, cand, at, &aff))
3138     return NULL_TREE;
3139   unshare_aff_combination (&aff);
3140   return fold_convert (type, aff_combination_to_tree (&aff));
3141 }
3142
3143 /* Determines the expression by that USE is expressed from induction variable
3144    CAND in LOOP.  The computation is unshared.  */
3145
3146 static tree
3147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3148 {
3149   return get_computation_at (loop, use, cand, use->stmt);
3150 }
3151
3152 /* Adjust the cost COST for being in loop setup rather than loop body.
3153    If we're optimizing for space, the loop setup overhead is constant;
3154    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3155 static unsigned
3156 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3157 {
3158   if (cost == INFTY)
3159     return cost;
3160   else if (optimize_loop_for_speed_p (data->current_loop))
3161     return cost / avg_loop_niter (data->current_loop);
3162   else
3163     return cost;
3164 }
3165
3166 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3167    validity for a memory reference accessing memory of mode MODE in
3168    address space AS.  */
3169
3170
3171 bool
3172 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3173                                  addr_space_t as)
3174 {
3175 #define MAX_RATIO 128
3176   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3177   static vec<sbitmap> valid_mult_list;
3178   sbitmap valid_mult;
3179
3180   if (data_index >= valid_mult_list.length ())
3181     valid_mult_list.safe_grow_cleared (data_index + 1);
3182
3183   valid_mult = valid_mult_list[data_index];
3184   if (!valid_mult)
3185     {
3186       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3187       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3188       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3189       rtx addr, scaled;
3190       HOST_WIDE_INT i;
3191
3192       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3193       bitmap_clear (valid_mult);
3194       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3195       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3196       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3197         {
3198           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3199           if (memory_address_addr_space_p (mode, addr, as)
3200               || memory_address_addr_space_p (mode, scaled, as))
3201             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3202         }
3203
3204       if (dump_file && (dump_flags & TDF_DETAILS))
3205         {
3206           fprintf (dump_file, "  allowed multipliers:");
3207           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3208             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3209               fprintf (dump_file, " %d", (int) i);
3210           fprintf (dump_file, "\n");
3211           fprintf (dump_file, "\n");
3212         }
3213
3214       valid_mult_list[data_index] = valid_mult;
3215     }
3216
3217   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3218     return false;
3219
3220   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3221 }
3222
3223 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3224    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3225    variable is omitted.  Compute the cost for a memory reference that accesses
3226    a memory location of mode MEM_MODE in address space AS.
3227
3228    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3229    size of MEM_MODE / RATIO) is available.  To make this determination, we
3230    look at the size of the increment to be made, which is given in CSTEP.
3231    CSTEP may be zero if the step is unknown.
3232    STMT_AFTER_INC is true iff the statement we're looking at is after the
3233    increment of the original biv.
3234
3235    TODO -- there must be some better way.  This all is quite crude.  */
3236
3237 enum ainc_type
3238 {
3239   AINC_PRE_INC,         /* Pre increment.  */
3240   AINC_PRE_DEC,         /* Pre decrement.  */
3241   AINC_POST_INC,        /* Post increment.  */
3242   AINC_POST_DEC,        /* Post decrement.  */
3243   AINC_NONE             /* Also the number of auto increment types.  */
3244 };
3245
3246 typedef struct address_cost_data_s
3247 {
3248   HOST_WIDE_INT min_offset, max_offset;
3249   unsigned costs[2][2][2][2];
3250   unsigned ainc_costs[AINC_NONE];
3251 } *address_cost_data;
3252
3253
3254 static comp_cost
3255 get_address_cost (bool symbol_present, bool var_present,
3256                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3257                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3258                   addr_space_t as, bool speed,
3259                   bool stmt_after_inc, bool *may_autoinc)
3260 {
3261   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3262   static vec<address_cost_data> address_cost_data_list;
3263   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3264   address_cost_data data;
3265   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3266   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3267   unsigned cost, acost, complexity;
3268   enum ainc_type autoinc_type;
3269   bool offset_p, ratio_p, autoinc;
3270   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3271   unsigned HOST_WIDE_INT mask;
3272   unsigned bits;
3273
3274   if (data_index >= address_cost_data_list.length ())
3275     address_cost_data_list.safe_grow_cleared (data_index + 1);
3276
3277   data = address_cost_data_list[data_index];
3278   if (!data)
3279     {
3280       HOST_WIDE_INT i;
3281       HOST_WIDE_INT rat, off = 0;
3282       int old_cse_not_expected, width;
3283       unsigned sym_p, var_p, off_p, rat_p, add_c;
3284       rtx seq, addr, base;
3285       rtx reg0, reg1;
3286
3287       data = (address_cost_data) xcalloc (1, sizeof (*data));
3288
3289       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3290
3291       width = GET_MODE_BITSIZE (address_mode) - 1;
3292       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3293         width = HOST_BITS_PER_WIDE_INT - 1;
3294       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3295
3296       for (i = width; i >= 0; i--)
3297         {
3298           off = -((unsigned HOST_WIDE_INT) 1 << i);
3299           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3300           if (memory_address_addr_space_p (mem_mode, addr, as))
3301             break;
3302         }
3303       data->min_offset = (i == -1? 0 : off);
3304
3305       for (i = width; i >= 0; i--)
3306         {
3307           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3308           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3309           if (memory_address_addr_space_p (mem_mode, addr, as))
3310             break;
3311         }
3312       if (i == -1)
3313         off = 0;
3314       data->max_offset = off;
3315
3316       if (dump_file && (dump_flags & TDF_DETAILS))
3317         {
3318           fprintf (dump_file, "get_address_cost:\n");
3319           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3320                    GET_MODE_NAME (mem_mode),
3321                    data->min_offset);
3322           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3323                    GET_MODE_NAME (mem_mode),
3324                    data->max_offset);
3325         }
3326
3327       rat = 1;
3328       for (i = 2; i <= MAX_RATIO; i++)
3329         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3330           {
3331             rat = i;
3332             break;
3333           }
3334
3335       /* Compute the cost of various addressing modes.  */
3336       acost = 0;
3337       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3338       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3339
3340       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3341           || USE_STORE_PRE_DECREMENT (mem_mode))
3342         {
3343           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3344           has_predec[mem_mode]
3345             = memory_address_addr_space_p (mem_mode, addr, as);
3346
3347           if (has_predec[mem_mode])
3348             data->ainc_costs[AINC_PRE_DEC]
3349               = address_cost (addr, mem_mode, as, speed);
3350         }
3351       if (USE_LOAD_POST_DECREMENT (mem_mode)
3352           || USE_STORE_POST_DECREMENT (mem_mode))
3353         {
3354           addr = gen_rtx_POST_DEC (address_mode, reg0);
3355           has_postdec[mem_mode]
3356             = memory_address_addr_space_p (mem_mode, addr, as);
3357
3358           if (has_postdec[mem_mode])
3359             data->ainc_costs[AINC_POST_DEC]
3360               = address_cost (addr, mem_mode, as, speed);
3361         }
3362       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3363           || USE_STORE_PRE_DECREMENT (mem_mode))
3364         {
3365           addr = gen_rtx_PRE_INC (address_mode, reg0);
3366           has_preinc[mem_mode]
3367             = memory_address_addr_space_p (mem_mode, addr, as);
3368
3369           if (has_preinc[mem_mode])
3370             data->ainc_costs[AINC_PRE_INC]
3371               = address_cost (addr, mem_mode, as, speed);
3372         }
3373       if (USE_LOAD_POST_INCREMENT (mem_mode)
3374           || USE_STORE_POST_INCREMENT (mem_mode))
3375         {
3376           addr = gen_rtx_POST_INC (address_mode, reg0);
3377           has_postinc[mem_mode]
3378             = memory_address_addr_space_p (mem_mode, addr, as);
3379
3380           if (has_postinc[mem_mode])
3381             data->ainc_costs[AINC_POST_INC]
3382               = address_cost (addr, mem_mode, as, speed);
3383         }
3384       for (i = 0; i < 16; i++)
3385         {
3386           sym_p = i & 1;
3387           var_p = (i >> 1) & 1;
3388           off_p = (i >> 2) & 1;
3389           rat_p = (i >> 3) & 1;
3390
3391           addr = reg0;
3392           if (rat_p)
3393             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3394                                    gen_int_mode (rat, address_mode));
3395
3396           if (var_p)
3397             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3398
3399           if (sym_p)
3400             {
3401               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3402               /* ??? We can run into trouble with some backends by presenting
3403                  it with symbols which haven't been properly passed through
3404                  targetm.encode_section_info.  By setting the local bit, we
3405                  enhance the probability of things working.  */
3406               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3407
3408               if (off_p)
3409                 base = gen_rtx_fmt_e (CONST, address_mode,
3410                                       gen_rtx_fmt_ee
3411                                         (PLUS, address_mode, base,
3412                                          gen_int_mode (off, address_mode)));
3413             }
3414           else if (off_p)
3415             base = gen_int_mode (off, address_mode);
3416           else
3417             base = NULL_RTX;
3418
3419           if (base)
3420             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3421
3422           start_sequence ();
3423           /* To avoid splitting addressing modes, pretend that no cse will
3424              follow.  */
3425           old_cse_not_expected = cse_not_expected;
3426           cse_not_expected = true;
3427           addr = memory_address_addr_space (mem_mode, addr, as);
3428           cse_not_expected = old_cse_not_expected;
3429           seq = get_insns ();
3430           end_sequence ();
3431
3432           acost = seq_cost (seq, speed);
3433           acost += address_cost (addr, mem_mode, as, speed);
3434
3435           if (!acost)
3436             acost = 1;
3437           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3438         }
3439
3440       /* On some targets, it is quite expensive to load symbol to a register,
3441          which makes addresses that contain symbols look much more expensive.
3442          However, the symbol will have to be loaded in any case before the
3443          loop (and quite likely we have it in register already), so it does not
3444          make much sense to penalize them too heavily.  So make some final
3445          tweaks for the SYMBOL_PRESENT modes:
3446
3447          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3448          var is cheaper, use this mode with small penalty.
3449          If VAR_PRESENT is true, try whether the mode with
3450          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3451          if this is the case, use it.  */
3452       add_c = add_cost (speed, address_mode);
3453       for (i = 0; i < 8; i++)
3454         {
3455           var_p = i & 1;
3456           off_p = (i >> 1) & 1;
3457           rat_p = (i >> 2) & 1;
3458
3459           acost = data->costs[0][1][off_p][rat_p] + 1;
3460           if (var_p)
3461             acost += add_c;
3462
3463           if (acost < data->costs[1][var_p][off_p][rat_p])
3464             data->costs[1][var_p][off_p][rat_p] = acost;
3465         }
3466
3467       if (dump_file && (dump_flags & TDF_DETAILS))
3468         {
3469           fprintf (dump_file, "Address costs:\n");
3470
3471           for (i = 0; i < 16; i++)
3472             {
3473               sym_p = i & 1;
3474               var_p = (i >> 1) & 1;
3475               off_p = (i >> 2) & 1;
3476               rat_p = (i >> 3) & 1;
3477
3478               fprintf (dump_file, "  ");
3479               if (sym_p)
3480                 fprintf (dump_file, "sym + ");
3481               if (var_p)
3482                 fprintf (dump_file, "var + ");
3483               if (off_p)
3484                 fprintf (dump_file, "cst + ");
3485               if (rat_p)
3486                 fprintf (dump_file, "rat * ");
3487
3488               acost = data->costs[sym_p][var_p][off_p][rat_p];
3489               fprintf (dump_file, "index costs %d\n", acost);
3490             }
3491           if (has_predec[mem_mode] || has_postdec[mem_mode]
3492               || has_preinc[mem_mode] || has_postinc[mem_mode])
3493             fprintf (dump_file, "  May include autoinc/dec\n");
3494           fprintf (dump_file, "\n");
3495         }
3496
3497       address_cost_data_list[data_index] = data;
3498     }
3499
3500   bits = GET_MODE_BITSIZE (address_mode);
3501   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3502   offset &= mask;
3503   if ((offset >> (bits - 1) & 1))
3504     offset |= ~mask;
3505   s_offset = offset;
3506
3507   autoinc = false;
3508   autoinc_type = AINC_NONE;
3509   msize = GET_MODE_SIZE (mem_mode);
3510   autoinc_offset = offset;
3511   if (stmt_after_inc)
3512     autoinc_offset += ratio * cstep;
3513   if (symbol_present || var_present || ratio != 1)
3514     autoinc = false;
3515   else
3516     {
3517       if (has_postinc[mem_mode] && autoinc_offset == 0
3518           && msize == cstep)
3519         autoinc_type = AINC_POST_INC;
3520       else if (has_postdec[mem_mode] && autoinc_offset == 0
3521                && msize == -cstep)
3522         autoinc_type = AINC_POST_DEC;
3523       else if (has_preinc[mem_mode] && autoinc_offset == msize
3524                && msize == cstep)
3525         autoinc_type = AINC_PRE_INC;
3526       else if (has_predec[mem_mode] && autoinc_offset == -msize
3527                && msize == -cstep)
3528         autoinc_type = AINC_PRE_DEC;
3529
3530       if (autoinc_type != AINC_NONE)
3531         autoinc = true;
3532     }
3533
3534   cost = 0;
3535   offset_p = (s_offset != 0
3536               && data->min_offset <= s_offset
3537               && s_offset <= data->max_offset);
3538   ratio_p = (ratio != 1
3539              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3540
3541   if (ratio != 1 && !ratio_p)
3542     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3543
3544   if (s_offset && !offset_p && !symbol_present)
3545     cost += add_cost (speed, address_mode);
3546
3547   if (may_autoinc)
3548     *may_autoinc = autoinc;
3549   if (autoinc)
3550     acost = data->ainc_costs[autoinc_type];
3551   else
3552     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3553   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3554   return new_cost (cost + acost, complexity);
3555 }
3556
3557  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3558     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3559     calculating the operands of EXPR.  Returns true if successful, and returns
3560     the cost in COST.  */
3561
3562 static bool
3563 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3564                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3565 {
3566   comp_cost res;
3567   tree op1 = TREE_OPERAND (expr, 1);
3568   tree cst = TREE_OPERAND (mult, 1);
3569   tree multop = TREE_OPERAND (mult, 0);
3570   int m = exact_log2 (int_cst_value (cst));
3571   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3572   int sa_cost;
3573   bool equal_p = false;
3574
3575   if (!(m >= 0 && m < maxm))
3576     return false;
3577
3578   if (operand_equal_p (op1, mult, 0))
3579     equal_p = true;
3580
3581   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3582              ? shiftadd_cost (speed, mode, m)
3583              : (equal_p
3584                 ? shiftsub1_cost (speed, mode, m)
3585                 : shiftsub0_cost (speed, mode, m)));
3586   res = new_cost (sa_cost, 0);
3587   res = add_costs (res, equal_p ? cost0 : cost1);
3588
3589   STRIP_NOPS (multop);
3590   if (!is_gimple_val (multop))
3591     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3592
3593   *cost = res;
3594   return true;
3595 }
3596
3597 /* Estimates cost of forcing expression EXPR into a variable.  */
3598
3599 static comp_cost
3600 force_expr_to_var_cost (tree expr, bool speed)
3601 {
3602   static bool costs_initialized = false;
3603   static unsigned integer_cost [2];
3604   static unsigned symbol_cost [2];
3605   static unsigned address_cost [2];
3606   tree op0, op1;
3607   comp_cost cost0, cost1, cost;
3608   enum machine_mode mode;
3609
3610   if (!costs_initialized)
3611     {
3612       tree type = build_pointer_type (integer_type_node);
3613       tree var, addr;
3614       rtx x;
3615       int i;
3616
3617       var = create_tmp_var_raw (integer_type_node, "test_var");
3618       TREE_STATIC (var) = 1;
3619       x = produce_memory_decl_rtl (var, NULL);
3620       SET_DECL_RTL (var, x);
3621
3622       addr = build1 (ADDR_EXPR, type, var);
3623
3624
3625       for (i = 0; i < 2; i++)
3626         {
3627           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3628                                                              2000), i);
3629
3630           symbol_cost[i] = computation_cost (addr, i) + 1;
3631
3632           address_cost[i]
3633             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3634           if (dump_file && (dump_flags & TDF_DETAILS))
3635             {
3636               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3637               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3638               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3639               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3640               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3641               fprintf (dump_file, "\n");
3642             }
3643         }
3644
3645       costs_initialized = true;
3646     }
3647
3648   STRIP_NOPS (expr);
3649
3650   if (SSA_VAR_P (expr))
3651     return no_cost;
3652
3653   if (is_gimple_min_invariant (expr))
3654     {
3655       if (TREE_CODE (expr) == INTEGER_CST)
3656         return new_cost (integer_cost [speed], 0);
3657
3658       if (TREE_CODE (expr) == ADDR_EXPR)
3659         {
3660           tree obj = TREE_OPERAND (expr, 0);
3661
3662           if (TREE_CODE (obj) == VAR_DECL
3663               || TREE_CODE (obj) == PARM_DECL
3664               || TREE_CODE (obj) == RESULT_DECL)
3665             return new_cost (symbol_cost [speed], 0);
3666         }
3667
3668       return new_cost (address_cost [speed], 0);
3669     }
3670
3671   switch (TREE_CODE (expr))
3672     {
3673     case POINTER_PLUS_EXPR:
3674     case PLUS_EXPR:
3675     case MINUS_EXPR:
3676     case MULT_EXPR:
3677       op0 = TREE_OPERAND (expr, 0);
3678       op1 = TREE_OPERAND (expr, 1);
3679       STRIP_NOPS (op0);
3680       STRIP_NOPS (op1);
3681       break;
3682
3683     CASE_CONVERT:
3684     case NEGATE_EXPR:
3685       op0 = TREE_OPERAND (expr, 0);
3686       STRIP_NOPS (op0);
3687       op1 = NULL_TREE;
3688       break;
3689
3690     default:
3691       /* Just an arbitrary value, FIXME.  */
3692       return new_cost (target_spill_cost[speed], 0);
3693     }
3694
3695   if (op0 == NULL_TREE
3696       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3697     cost0 = no_cost;
3698   else
3699     cost0 = force_expr_to_var_cost (op0, speed);
3700
3701   if (op1 == NULL_TREE
3702       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3703     cost1 = no_cost;
3704   else
3705     cost1 = force_expr_to_var_cost (op1, speed);
3706
3707   mode = TYPE_MODE (TREE_TYPE (expr));
3708   switch (TREE_CODE (expr))
3709     {
3710     case POINTER_PLUS_EXPR:
3711     case PLUS_EXPR:
3712     case MINUS_EXPR:
3713     case NEGATE_EXPR:
3714       cost = new_cost (add_cost (speed, mode), 0);
3715       if (TREE_CODE (expr) != NEGATE_EXPR)
3716         {
3717           tree mult = NULL_TREE;
3718           comp_cost sa_cost;
3719           if (TREE_CODE (op1) == MULT_EXPR)
3720             mult = op1;
3721           else if (TREE_CODE (op0) == MULT_EXPR)
3722             mult = op0;
3723
3724           if (mult != NULL_TREE
3725               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3726               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3727                                     speed, &sa_cost))
3728             return sa_cost;
3729         }
3730       break;
3731
3732     CASE_CONVERT:
3733       {
3734         tree inner_mode, outer_mode;
3735         outer_mode = TREE_TYPE (expr);
3736         inner_mode = TREE_TYPE (op0);
3737         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3738                                        TYPE_MODE (inner_mode), speed), 0);
3739       }
3740       break;
3741
3742     case MULT_EXPR:
3743       if (cst_and_fits_in_hwi (op0))
3744         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3745                                              mode, speed), 0);
3746       else if (cst_and_fits_in_hwi (op1))
3747         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3748                                              mode, speed), 0);
3749       else
3750         return new_cost (target_spill_cost [speed], 0);
3751       break;
3752
3753     default:
3754       gcc_unreachable ();
3755     }
3756
3757   cost = add_costs (cost, cost0);
3758   cost = add_costs (cost, cost1);
3759
3760   /* Bound the cost by target_spill_cost.  The parts of complicated
3761      computations often are either loop invariant or at least can
3762      be shared between several iv uses, so letting this grow without
3763      limits would not give reasonable results.  */
3764   if (cost.cost > (int) target_spill_cost [speed])
3765     cost.cost = target_spill_cost [speed];
3766
3767   return cost;
3768 }
3769
3770 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3771    invariants the computation depends on.  */
3772
3773 static comp_cost
3774 force_var_cost (struct ivopts_data *data,
3775                 tree expr, bitmap *depends_on)
3776 {
3777   if (depends_on)
3778     {
3779       fd_ivopts_data = data;
3780       walk_tree (&expr, find_depends, depends_on, NULL);
3781     }
3782
3783   return force_expr_to_var_cost (expr, data->speed);
3784 }
3785
3786 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3787    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3788    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3789    invariants the computation depends on.  */
3790
3791 static comp_cost
3792 split_address_cost (struct ivopts_data *data,
3793                     tree addr, bool *symbol_present, bool *var_present,
3794                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3795 {
3796   tree core;
3797   HOST_WIDE_INT bitsize;
3798   HOST_WIDE_INT bitpos;
3799   tree toffset;
3800   enum machine_mode mode;
3801   int unsignedp, volatilep;
3802
3803   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3804                               &unsignedp, &volatilep, false);
3805
3806   if (toffset != 0
3807       || bitpos % BITS_PER_UNIT != 0
3808       || TREE_CODE (core) != VAR_DECL)
3809     {
3810       *symbol_present = false;
3811       *var_present = true;
3812       fd_ivopts_data = data;
3813       walk_tree (&addr, find_depends, depends_on, NULL);
3814       return new_cost (target_spill_cost[data->speed], 0);
3815     }
3816
3817   *offset += bitpos / BITS_PER_UNIT;
3818   if (TREE_STATIC (core)
3819       || DECL_EXTERNAL (core))
3820     {
3821       *symbol_present = true;
3822       *var_present = false;
3823       return no_cost;
3824     }
3825
3826   *symbol_present = false;
3827   *var_present = true;
3828   return no_cost;
3829 }
3830
3831 /* Estimates cost of expressing difference of addresses E1 - E2 as
3832    var + symbol + offset.  The value of offset is added to OFFSET,
3833    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3834    part is missing.  DEPENDS_ON is a set of the invariants the computation
3835    depends on.  */
3836
3837 static comp_cost
3838 ptr_difference_cost (struct ivopts_data *data,
3839                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3840                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3841 {
3842   HOST_WIDE_INT diff = 0;
3843   aff_tree aff_e1, aff_e2;
3844   tree type;
3845
3846   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3847
3848   if (ptr_difference_const (e1, e2, &diff))
3849     {
3850       *offset += diff;
3851       *symbol_present = false;
3852       *var_present = false;
3853       return no_cost;
3854     }
3855
3856   if (integer_zerop (e2))
3857     return split_address_cost (data, TREE_OPERAND (e1, 0),
3858                                symbol_present, var_present, offset, depends_on);
3859
3860   *symbol_present = false;
3861   *var_present = true;
3862
3863   type = signed_type_for (TREE_TYPE (e1));
3864   tree_to_aff_combination (e1, type, &aff_e1);
3865   tree_to_aff_combination (e2, type, &aff_e2);
3866   aff_combination_scale (&aff_e2, -1);
3867   aff_combination_add (&aff_e1, &aff_e2);
3868
3869   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3870 }
3871
3872 /* Estimates cost of expressing difference E1 - E2 as
3873    var + symbol + offset.  The value of offset is added to OFFSET,
3874    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3875    part is missing.  DEPENDS_ON is a set of the invariants the computation
3876    depends on.  */
3877
3878 static comp_cost
3879 difference_cost (struct ivopts_data *data,
3880                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3881                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3882 {
3883   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3884   unsigned HOST_WIDE_INT off1, off2;
3885   aff_tree aff_e1, aff_e2;
3886   tree type;
3887
3888   e1 = strip_offset (e1, &off1);
3889   e2 = strip_offset (e2, &off2);
3890   *offset += off1 - off2;
3891
3892   STRIP_NOPS (e1);
3893   STRIP_NOPS (e2);
3894
3895   if (TREE_CODE (e1) == ADDR_EXPR)
3896     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3897                                 offset, depends_on);
3898   *symbol_present = false;
3899
3900   if (operand_equal_p (e1, e2, 0))
3901     {
3902       *var_present = false;
3903       return no_cost;
3904     }
3905
3906   *var_present = true;
3907
3908   if (integer_zerop (e2))
3909     return force_var_cost (data, e1, depends_on);
3910
3911   if (integer_zerop (e1))
3912     {
3913       comp_cost cost = force_var_cost (data, e2, depends_on);
3914       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3915       return cost;
3916     }
3917
3918   type = signed_type_for (TREE_TYPE (e1));
3919   tree_to_aff_combination (e1, type, &aff_e1);
3920   tree_to_aff_combination (e2, type, &aff_e2);
3921   aff_combination_scale (&aff_e2, -1);
3922   aff_combination_add (&aff_e1, &aff_e2);
3923
3924   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3925 }
3926
3927 /* Returns true if AFF1 and AFF2 are identical.  */
3928
3929 static bool
3930 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3931 {
3932   unsigned i;
3933
3934   if (aff1->n != aff2->n)
3935     return false;
3936
3937   for (i = 0; i < aff1->n; i++)
3938     {
3939       if (aff1->elts[i].coef != aff2->elts[i].coef)
3940         return false;
3941
3942       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3943         return false;
3944     }
3945   return true;
3946 }
3947
3948 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3949
3950 static int
3951 get_expr_id (struct ivopts_data *data, tree expr)
3952 {
3953   struct iv_inv_expr_ent ent;
3954   struct iv_inv_expr_ent **slot;
3955
3956   ent.expr = expr;
3957   ent.hash = iterative_hash_expr (expr, 0);
3958   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3959   if (*slot)
3960     return (*slot)->id;
3961
3962   *slot = XNEW (struct iv_inv_expr_ent);
3963   (*slot)->expr = expr;
3964   (*slot)->hash = ent.hash;
3965   (*slot)->id = data->inv_expr_id++;
3966   return (*slot)->id;
3967 }
3968
3969 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3970    requires a new compiler generated temporary.  Returns -1 otherwise.
3971    ADDRESS_P is a flag indicating if the expression is for address
3972    computation.  */
3973
3974 static int
3975 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3976                             tree cbase, HOST_WIDE_INT ratio,
3977                             bool address_p)
3978 {
3979   aff_tree ubase_aff, cbase_aff;
3980   tree expr, ub, cb;
3981
3982   STRIP_NOPS (ubase);
3983   STRIP_NOPS (cbase);
3984   ub = ubase;
3985   cb = cbase;
3986
3987   if ((TREE_CODE (ubase) == INTEGER_CST)
3988       && (TREE_CODE (cbase) == INTEGER_CST))
3989     return -1;
3990
3991   /* Strips the constant part. */
3992   if (TREE_CODE (ubase) == PLUS_EXPR
3993       || TREE_CODE (ubase) == MINUS_EXPR
3994       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3995     {
3996       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3997         ubase = TREE_OPERAND (ubase, 0);
3998     }
3999
4000   /* Strips the constant part. */
4001   if (TREE_CODE (cbase) == PLUS_EXPR
4002       || TREE_CODE (cbase) == MINUS_EXPR
4003       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4004     {
4005       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4006         cbase = TREE_OPERAND (cbase, 0);
4007     }
4008
4009   if (address_p)
4010     {
4011       if (((TREE_CODE (ubase) == SSA_NAME)
4012            || (TREE_CODE (ubase) == ADDR_EXPR
4013                && is_gimple_min_invariant (ubase)))
4014           && (TREE_CODE (cbase) == INTEGER_CST))
4015         return -1;
4016
4017       if (((TREE_CODE (cbase) == SSA_NAME)
4018            || (TREE_CODE (cbase) == ADDR_EXPR
4019                && is_gimple_min_invariant (cbase)))
4020           && (TREE_CODE (ubase) == INTEGER_CST))
4021         return -1;
4022     }
4023
4024   if (ratio == 1)
4025     {
4026       if (operand_equal_p (ubase, cbase, 0))
4027         return -1;
4028
4029       if (TREE_CODE (ubase) == ADDR_EXPR
4030           && TREE_CODE (cbase) == ADDR_EXPR)
4031         {
4032           tree usym, csym;
4033
4034           usym = TREE_OPERAND (ubase, 0);
4035           csym = TREE_OPERAND (cbase, 0);
4036           if (TREE_CODE (usym) == ARRAY_REF)
4037             {
4038               tree ind = TREE_OPERAND (usym, 1);
4039               if (TREE_CODE (ind) == INTEGER_CST
4040                   && tree_fits_shwi_p (ind)
4041                   && tree_to_shwi (ind) == 0)
4042                 usym = TREE_OPERAND (usym, 0);
4043             }
4044           if (TREE_CODE (csym) == ARRAY_REF)
4045             {
4046               tree ind = TREE_OPERAND (csym, 1);
4047               if (TREE_CODE (ind) == INTEGER_CST
4048                   && tree_fits_shwi_p (ind)
4049                   && tree_to_shwi (ind) == 0)
4050                 csym = TREE_OPERAND (csym, 0);
4051             }
4052           if (operand_equal_p (usym, csym, 0))
4053             return -1;
4054         }
4055       /* Now do more complex comparison  */
4056       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4057       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4058       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4059         return -1;
4060     }
4061
4062   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4063   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4064
4065   aff_combination_scale (&cbase_aff, -1 * ratio);
4066   aff_combination_add (&ubase_aff, &cbase_aff);
4067   expr = aff_combination_to_tree (&ubase_aff);
4068   return get_expr_id (data, expr);
4069 }
4070
4071
4072
4073 /* Determines the cost of the computation by that USE is expressed
4074    from induction variable CAND.  If ADDRESS_P is true, we just need
4075    to create an address from it, otherwise we want to get it into
4076    register.  A set of invariants we depend on is stored in
4077    DEPENDS_ON.  AT is the statement at that the value is computed.
4078    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4079    addressing is likely.  */
4080
4081 static comp_cost
4082 get_computation_cost_at (struct ivopts_data *data,
4083                          struct iv_use *use, struct iv_cand *cand,
4084                          bool address_p, bitmap *depends_on, gimple at,
4085                          bool *can_autoinc,
4086                          int *inv_expr_id)
4087 {
4088   tree ubase = use->iv->base, ustep = use->iv->step;
4089   tree cbase, cstep;
4090   tree utype = TREE_TYPE (ubase), ctype;
4091   unsigned HOST_WIDE_INT cstepi, offset = 0;
4092   HOST_WIDE_INT ratio, aratio;
4093   bool var_present, symbol_present, stmt_is_after_inc;
4094   comp_cost cost;
4095   widest_int rat;
4096   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4097   enum machine_mode mem_mode = (address_p
4098                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4099                                 : VOIDmode);
4100
4101   *depends_on = NULL;
4102
4103   /* Only consider real candidates.  */
4104   if (!cand->iv)
4105     return infinite_cost;
4106
4107   cbase = cand->iv->base;
4108   cstep = cand->iv->step;
4109   ctype = TREE_TYPE (cbase);
4110
4111   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4112     {
4113       /* We do not have a precision to express the values of use.  */
4114       return infinite_cost;
4115     }
4116
4117   if (address_p
4118       || (use->iv->base_object
4119           && cand->iv->base_object
4120           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4121           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4122     {
4123       /* Do not try to express address of an object with computation based
4124          on address of a different object.  This may cause problems in rtl
4125          level alias analysis (that does not expect this to be happening,
4126          as this is illegal in C), and would be unlikely to be useful
4127          anyway.  */
4128       if (use->iv->base_object
4129           && cand->iv->base_object
4130           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4131         return infinite_cost;
4132     }
4133
4134   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4135     {
4136       /* TODO -- add direct handling of this case.  */
4137       goto fallback;
4138     }
4139
4140   /* CSTEPI is removed from the offset in case statement is after the
4141      increment.  If the step is not constant, we use zero instead.
4142      This is a bit imprecise (there is the extra addition), but
4143      redundancy elimination is likely to transform the code so that
4144      it uses value of the variable before increment anyway,
4145      so it is not that much unrealistic.  */
4146   if (cst_and_fits_in_hwi (cstep))
4147     cstepi = int_cst_value (cstep);
4148   else
4149     cstepi = 0;
4150
4151   if (!constant_multiple_of (ustep, cstep, &rat))
4152     return infinite_cost;
4153
4154   if (wi::fits_shwi_p (rat))
4155     ratio = rat.to_shwi ();
4156   else
4157     return infinite_cost;
4158
4159   STRIP_NOPS (cbase);
4160   ctype = TREE_TYPE (cbase);
4161
4162   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4163
4164   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4165      or ratio == 1, it is better to handle this like
4166
4167      ubase - ratio * cbase + ratio * var
4168
4169      (also holds in the case ratio == -1, TODO.  */
4170
4171   if (cst_and_fits_in_hwi (cbase))
4172     {
4173       offset = - ratio * int_cst_value (cbase);
4174       cost = difference_cost (data,
4175                               ubase, build_int_cst (utype, 0),
4176                               &symbol_present, &var_present, &offset,
4177                               depends_on);
4178       cost.cost /= avg_loop_niter (data->current_loop);
4179     }
4180   else if (ratio == 1)
4181     {
4182       tree real_cbase = cbase;
4183
4184       /* Check to see if any adjustment is needed.  */
4185       if (cstepi == 0 && stmt_is_after_inc)
4186         {
4187           aff_tree real_cbase_aff;
4188           aff_tree cstep_aff;
4189
4190           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4191                                    &real_cbase_aff);
4192           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4193
4194           aff_combination_add (&real_cbase_aff, &cstep_aff);
4195           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4196         }
4197
4198       cost = difference_cost (data,
4199                               ubase, real_cbase,
4200                               &symbol_present, &var_present, &offset,
4201                               depends_on);
4202       cost.cost /= avg_loop_niter (data->current_loop);
4203     }
4204   else if (address_p
4205            && !POINTER_TYPE_P (ctype)
4206            && multiplier_allowed_in_address_p
4207                 (ratio, mem_mode,
4208                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4209     {
4210       cbase
4211         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4212       cost = difference_cost (data,
4213                               ubase, cbase,
4214                               &symbol_present, &var_present, &offset,
4215                               depends_on);
4216       cost.cost /= avg_loop_niter (data->current_loop);
4217     }
4218   else
4219     {
4220       cost = force_var_cost (data, cbase, depends_on);
4221       cost = add_costs (cost,
4222                         difference_cost (data,
4223                                          ubase, build_int_cst (utype, 0),
4224                                          &symbol_present, &var_present,
4225                                          &offset, depends_on));
4226       cost.cost /= avg_loop_niter (data->current_loop);
4227       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4228     }
4229
4230   if (inv_expr_id)
4231     {
4232       *inv_expr_id =
4233           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4234       /* Clear depends on.  */
4235       if (*inv_expr_id != -1 && depends_on && *depends_on)
4236         bitmap_clear (*depends_on);
4237     }
4238
4239   /* If we are after the increment, the value of the candidate is higher by
4240      one iteration.  */
4241   if (stmt_is_after_inc)
4242     offset -= ratio * cstepi;
4243
4244   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4245      (symbol/var1/const parts may be omitted).  If we are looking for an
4246      address, find the cost of addressing this.  */
4247   if (address_p)
4248     return add_costs (cost,
4249                       get_address_cost (symbol_present, var_present,
4250                                         offset, ratio, cstepi,
4251                                         mem_mode,
4252                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4253                                         speed, stmt_is_after_inc,
4254                                         can_autoinc));
4255
4256   /* Otherwise estimate the costs for computing the expression.  */
4257   if (!symbol_present && !var_present && !offset)
4258     {
4259       if (ratio != 1)
4260         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4261       return cost;
4262     }
4263
4264   /* Symbol + offset should be compile-time computable so consider that they
4265       are added once to the variable, if present.  */
4266   if (var_present && (symbol_present || offset))
4267     cost.cost += adjust_setup_cost (data,
4268                                     add_cost (speed, TYPE_MODE (ctype)));
4269
4270   /* Having offset does not affect runtime cost in case it is added to
4271      symbol, but it increases complexity.  */
4272   if (offset)
4273     cost.complexity++;
4274
4275   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4276
4277   aratio = ratio > 0 ? ratio : -ratio;
4278   if (aratio != 1)
4279     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4280   return cost;
4281
4282 fallback:
4283   if (can_autoinc)
4284     *can_autoinc = false;
4285
4286   {
4287     /* Just get the expression, expand it and measure the cost.  */
4288     tree comp = get_computation_at (data->current_loop, use, cand, at);
4289
4290     if (!comp)
4291       return infinite_cost;
4292
4293     if (address_p)
4294       comp = build_simple_mem_ref (comp);
4295
4296     return new_cost (computation_cost (comp, speed), 0);
4297   }
4298 }
4299
4300 /* Determines the cost of the computation by that USE is expressed
4301    from induction variable CAND.  If ADDRESS_P is true, we just need
4302    to create an address from it, otherwise we want to get it into
4303    register.  A set of invariants we depend on is stored in
4304    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4305    autoinc addressing is likely.  */
4306
4307 static comp_cost
4308 get_computation_cost (struct ivopts_data *data,
4309                       struct iv_use *use, struct iv_cand *cand,
4310                       bool address_p, bitmap *depends_on,
4311                       bool *can_autoinc, int *inv_expr_id)
4312 {
4313   return get_computation_cost_at (data,
4314                                   use, cand, address_p, depends_on, use->stmt,
4315                                   can_autoinc, inv_expr_id);
4316 }
4317
4318 /* Determines cost of basing replacement of USE on CAND in a generic
4319    expression.  */
4320
4321 static bool
4322 determine_use_iv_cost_generic (struct ivopts_data *data,
4323                                struct iv_use *use, struct iv_cand *cand)
4324 {
4325   bitmap depends_on;
4326   comp_cost cost;
4327   int inv_expr_id = -1;
4328
4329   /* The simple case first -- if we need to express value of the preserved
4330      original biv, the cost is 0.  This also prevents us from counting the
4331      cost of increment twice -- once at this use and once in the cost of
4332      the candidate.  */
4333   if (cand->pos == IP_ORIGINAL
4334       && cand->incremented_at == use->stmt)
4335     {
4336       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4337                        ERROR_MARK, -1);
4338       return true;
4339     }
4340
4341   cost = get_computation_cost (data, use, cand, false, &depends_on,
4342                                NULL, &inv_expr_id);
4343
4344   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4345                    inv_expr_id);
4346
4347   return !infinite_cost_p (cost);
4348 }
4349
4350 /* Determines cost of basing replacement of USE on CAND in an address.  */
4351
4352 static bool
4353 determine_use_iv_cost_address (struct ivopts_data *data,
4354                                struct iv_use *use, struct iv_cand *cand)
4355 {
4356   bitmap depends_on;
4357   bool can_autoinc;
4358   int inv_expr_id = -1;
4359   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4360                                          &can_autoinc, &inv_expr_id);
4361
4362   if (cand->ainc_use == use)
4363     {
4364       if (can_autoinc)
4365         cost.cost -= cand->cost_step;
4366       /* If we generated the candidate solely for exploiting autoincrement
4367          opportunities, and it turns out it can't be used, set the cost to
4368          infinity to make sure we ignore it.  */
4369       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4370         cost = infinite_cost;
4371     }
4372   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4373                    inv_expr_id);
4374
4375   return !infinite_cost_p (cost);
4376 }
4377
4378 /* Computes value of candidate CAND at position AT in iteration NITER, and
4379    stores it to VAL.  */
4380
4381 static void
4382 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4383                aff_tree *val)
4384 {
4385   aff_tree step, delta, nit;
4386   struct iv *iv = cand->iv;
4387   tree type = TREE_TYPE (iv->base);
4388   tree steptype = type;
4389   if (POINTER_TYPE_P (type))
4390     steptype = sizetype;
4391   steptype = unsigned_type_for (type);
4392
4393   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4394   aff_combination_convert (&step, steptype);
4395   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4396   aff_combination_convert (&nit, steptype);
4397   aff_combination_mult (&nit, &step, &delta);
4398   if (stmt_after_increment (loop, cand, at))
4399     aff_combination_add (&delta, &step);
4400
4401   tree_to_aff_combination (iv->base, type, val);
4402   if (!POINTER_TYPE_P (type))
4403     aff_combination_convert (val, steptype);
4404   aff_combination_add (val, &delta);
4405 }
4406
4407 /* Returns period of induction variable iv.  */
4408
4409 static tree
4410 iv_period (struct iv *iv)
4411 {
4412   tree step = iv->step, period, type;
4413   tree pow2div;
4414
4415   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4416
4417   type = unsigned_type_for (TREE_TYPE (step));
4418   /* Period of the iv is lcm (step, type_range)/step -1,
4419      i.e., N*type_range/step - 1. Since type range is power
4420      of two, N == (step >> num_of_ending_zeros_binary (step),
4421      so the final result is
4422
4423        (type_range >> num_of_ending_zeros_binary (step)) - 1
4424
4425   */
4426   pow2div = num_ending_zeros (step);
4427
4428   period = build_low_bits_mask (type,
4429                                 (TYPE_PRECISION (type)
4430                                  - tree_to_uhwi (pow2div)));
4431
4432   return period;
4433 }
4434
4435 /* Returns the comparison operator used when eliminating the iv USE.  */
4436
4437 static enum tree_code
4438 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4439 {
4440   struct loop *loop = data->current_loop;
4441   basic_block ex_bb;
4442   edge exit;
4443
4444   ex_bb = gimple_bb (use->stmt);
4445   exit = EDGE_SUCC (ex_bb, 0);
4446   if (flow_bb_inside_loop_p (loop, exit->dest))
4447     exit = EDGE_SUCC (ex_bb, 1);
4448
4449   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4450 }
4451
4452 static tree
4453 strip_wrap_conserving_type_conversions (tree exp)
4454 {
4455   while (tree_ssa_useless_type_conversion (exp)
4456          && (nowrap_type_p (TREE_TYPE (exp))
4457              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4458     exp = TREE_OPERAND (exp, 0);
4459   return exp;
4460 }
4461
4462 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4463    check for an exact match.  */
4464
4465 static bool
4466 expr_equal_p (tree e, tree what)
4467 {
4468   gimple stmt;
4469   enum tree_code code;
4470
4471   e = strip_wrap_conserving_type_conversions (e);
4472   what = strip_wrap_conserving_type_conversions (what);
4473
4474   code = TREE_CODE (what);
4475   if (TREE_TYPE (e) != TREE_TYPE (what))
4476     return false;
4477
4478   if (operand_equal_p (e, what, 0))
4479     return true;
4480
4481   if (TREE_CODE (e) != SSA_NAME)
4482     return false;
4483
4484   stmt = SSA_NAME_DEF_STMT (e);
4485   if (gimple_code (stmt) != GIMPLE_ASSIGN
4486       || gimple_assign_rhs_code (stmt) != code)
4487     return false;
4488
4489   switch (get_gimple_rhs_class (code))
4490     {
4491     case GIMPLE_BINARY_RHS:
4492       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4493         return false;
4494       /* Fallthru.  */
4495
4496     case GIMPLE_UNARY_RHS:
4497     case GIMPLE_SINGLE_RHS:
4498       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4499     default:
4500       return false;
4501     }
4502 }
4503
4504 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4505    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4506    calculation is performed in non-wrapping type.
4507
4508    TODO: More generally, we could test for the situation that
4509          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4510          This would require knowing the sign of OFFSET.
4511
4512          Also, we only look for the first addition in the computation of BASE.
4513          More complex analysis would be better, but introducing it just for
4514          this optimization seems like an overkill.  */
4515
4516 static bool
4517 difference_cannot_overflow_p (tree base, tree offset)
4518 {
4519   enum tree_code code;
4520   tree e1, e2;
4521
4522   if (!nowrap_type_p (TREE_TYPE (base)))
4523     return false;
4524
4525   base = expand_simple_operations (base);
4526
4527   if (TREE_CODE (base) == SSA_NAME)
4528     {
4529       gimple stmt = SSA_NAME_DEF_STMT (base);
4530
4531       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4532         return false;
4533
4534       code = gimple_assign_rhs_code (stmt);
4535       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4536         return false;
4537
4538       e1 = gimple_assign_rhs1 (stmt);
4539       e2 = gimple_assign_rhs2 (stmt);
4540     }
4541   else
4542     {
4543       code = TREE_CODE (base);
4544       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4545         return false;
4546       e1 = TREE_OPERAND (base, 0);
4547       e2 = TREE_OPERAND (base, 1);
4548     }
4549
4550   /* TODO: deeper inspection may be necessary to prove the equality.  */
4551   switch (code)
4552     {
4553     case PLUS_EXPR:
4554       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4555     case POINTER_PLUS_EXPR:
4556       return expr_equal_p (e2, offset);
4557
4558     default:
4559       return false;
4560     }
4561 }
4562
4563 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4564    comparison with CAND.  NITER describes the number of iterations of
4565    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4566
4567    We aim to handle the following situation:
4568
4569    sometype *base, *p;
4570    int a, b, i;
4571
4572    i = a;
4573    p = p_0 = base + a;
4574
4575    do
4576      {
4577        bla (*p);
4578        p++;
4579        i++;
4580      }
4581    while (i < b);
4582
4583    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4584    We aim to optimize this to
4585
4586    p = p_0 = base + a;
4587    do
4588      {
4589        bla (*p);
4590        p++;
4591      }
4592    while (p < p_0 - a + b);
4593
4594    This preserves the correctness, since the pointer arithmetics does not
4595    overflow.  More precisely:
4596
4597    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4598       overflow in computing it or the values of p.
4599    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4600       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4601
4602 static bool
4603 iv_elimination_compare_lt (struct ivopts_data *data,
4604                            struct iv_cand *cand, enum tree_code *comp_p,
4605                            struct tree_niter_desc *niter)
4606 {
4607   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4608   struct aff_tree nit, tmpa, tmpb;
4609   enum tree_code comp;
4610   HOST_WIDE_INT step;
4611
4612   /* We need to know that the candidate induction variable does not overflow.
4613      While more complex analysis may be used to prove this, for now just
4614      check that the variable appears in the original program and that it
4615      is computed in a type that guarantees no overflows.  */
4616   cand_type = TREE_TYPE (cand->iv->base);
4617   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4618     return false;
4619
4620   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4621      the calculation of the BOUND could overflow, making the comparison
4622      invalid.  */
4623   if (!data->loop_single_exit_p)
4624     return false;
4625
4626   /* We need to be able to decide whether candidate is increasing or decreasing
4627      in order to choose the right comparison operator.  */
4628   if (!cst_and_fits_in_hwi (cand->iv->step))
4629     return false;
4630   step = int_cst_value (cand->iv->step);
4631
4632   /* Check that the number of iterations matches the expected pattern:
4633      a + 1 > b ? 0 : b - a - 1.  */
4634   mbz = niter->may_be_zero;
4635   if (TREE_CODE (mbz) == GT_EXPR)
4636     {
4637       /* Handle a + 1 > b.  */
4638       tree op0 = TREE_OPERAND (mbz, 0);
4639       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4640         {
4641           a = TREE_OPERAND (op0, 0);
4642           b = TREE_OPERAND (mbz, 1);
4643         }
4644       else
4645         return false;
4646     }
4647   else if (TREE_CODE (mbz) == LT_EXPR)
4648     {
4649       tree op1 = TREE_OPERAND (mbz, 1);
4650
4651       /* Handle b < a + 1.  */
4652       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4653         {
4654           a = TREE_OPERAND (op1, 0);
4655           b = TREE_OPERAND (mbz, 0);
4656         }
4657       else
4658         return false;
4659     }
4660   else
4661     return false;
4662
4663   /* Expected number of iterations is B - A - 1.  Check that it matches
4664      the actual number, i.e., that B - A - NITER = 1.  */
4665   tree_to_aff_combination (niter->niter, nit_type, &nit);
4666   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4667   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4668   aff_combination_scale (&nit, -1);
4669   aff_combination_scale (&tmpa, -1);
4670   aff_combination_add (&tmpb, &tmpa);
4671   aff_combination_add (&tmpb, &nit);
4672   if (tmpb.n != 0 || tmpb.offset != 1)
4673     return false;
4674
4675   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4676      overflow.  */
4677   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4678                         cand->iv->step,
4679                         fold_convert (TREE_TYPE (cand->iv->step), a));
4680   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4681     return false;
4682
4683   /* Determine the new comparison operator.  */
4684   comp = step < 0 ? GT_EXPR : LT_EXPR;
4685   if (*comp_p == NE_EXPR)
4686     *comp_p = comp;
4687   else if (*comp_p == EQ_EXPR)
4688     *comp_p = invert_tree_comparison (comp, false);
4689   else
4690     gcc_unreachable ();
4691
4692   return true;
4693 }
4694
4695 /* Check whether it is possible to express the condition in USE by comparison
4696    of candidate CAND.  If so, store the value compared with to BOUND, and the
4697    comparison operator to COMP.  */
4698
4699 static bool
4700 may_eliminate_iv (struct ivopts_data *data,
4701                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4702                   enum tree_code *comp)
4703 {
4704   basic_block ex_bb;
4705   edge exit;
4706   tree period;
4707   struct loop *loop = data->current_loop;
4708   aff_tree bnd;
4709   struct tree_niter_desc *desc = NULL;
4710
4711   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4712     return false;
4713
4714   /* For now works only for exits that dominate the loop latch.
4715      TODO: extend to other conditions inside loop body.  */
4716   ex_bb = gimple_bb (use->stmt);
4717   if (use->stmt != last_stmt (ex_bb)
4718       || gimple_code (use->stmt) != GIMPLE_COND
4719       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4720     return false;
4721
4722   exit = EDGE_SUCC (ex_bb, 0);
4723   if (flow_bb_inside_loop_p (loop, exit->dest))
4724     exit = EDGE_SUCC (ex_bb, 1);
4725   if (flow_bb_inside_loop_p (loop, exit->dest))
4726     return false;
4727
4728   desc = niter_for_exit (data, exit);
4729   if (!desc)
4730     return false;
4731
4732   /* Determine whether we can use the variable to test the exit condition.
4733      This is the case iff the period of the induction variable is greater
4734      than the number of iterations for which the exit condition is true.  */
4735   period = iv_period (cand->iv);
4736
4737   /* If the number of iterations is constant, compare against it directly.  */
4738   if (TREE_CODE (desc->niter) == INTEGER_CST)
4739     {
4740       /* See cand_value_at.  */
4741       if (stmt_after_increment (loop, cand, use->stmt))
4742         {
4743           if (!tree_int_cst_lt (desc->niter, period))
4744             return false;
4745         }
4746       else
4747         {
4748           if (tree_int_cst_lt (period, desc->niter))
4749             return false;
4750         }
4751     }
4752
4753   /* If not, and if this is the only possible exit of the loop, see whether
4754      we can get a conservative estimate on the number of iterations of the
4755      entire loop and compare against that instead.  */
4756   else
4757     {
4758       widest_int period_value, max_niter;
4759
4760       max_niter = desc->max;
4761       if (stmt_after_increment (loop, cand, use->stmt))
4762         max_niter += 1;
4763       period_value = wi::to_widest (period);
4764       if (wi::gtu_p (max_niter, period_value))
4765         {
4766           /* See if we can take advantage of inferred loop bound information.  */
4767           if (data->loop_single_exit_p)
4768             {
4769               if (!max_loop_iterations (loop, &max_niter))
4770                 return false;
4771               /* The loop bound is already adjusted by adding 1.  */
4772               if (wi::gtu_p (max_niter, period_value))
4773                 return false;
4774             }
4775           else
4776             return false;
4777         }
4778     }
4779
4780   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4781
4782   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4783                          aff_combination_to_tree (&bnd));
4784   *comp = iv_elimination_compare (data, use);
4785
4786   /* It is unlikely that computing the number of iterations using division
4787      would be more profitable than keeping the original induction variable.  */
4788   if (expression_expensive_p (*bound))
4789     return false;
4790
4791   /* Sometimes, it is possible to handle the situation that the number of
4792      iterations may be zero unless additional assumtions by using <
4793      instead of != in the exit condition.
4794
4795      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4796            base the exit condition on it.  However, that is often too
4797            expensive.  */
4798   if (!integer_zerop (desc->may_be_zero))
4799     return iv_elimination_compare_lt (data, cand, comp, desc);
4800
4801   return true;
4802 }
4803
4804  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4805     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4806
4807 static int
4808 parm_decl_cost (struct ivopts_data *data, tree bound)
4809 {
4810   tree sbound = bound;
4811   STRIP_NOPS (sbound);
4812
4813   if (TREE_CODE (sbound) == SSA_NAME
4814       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4815       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4816       && data->body_includes_call)
4817     return COSTS_N_INSNS (1);
4818
4819   return 0;
4820 }
4821
4822 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4823
4824 static bool
4825 determine_use_iv_cost_condition (struct ivopts_data *data,
4826                                  struct iv_use *use, struct iv_cand *cand)
4827 {
4828   tree bound = NULL_TREE;
4829   struct iv *cmp_iv;
4830   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4831   comp_cost elim_cost, express_cost, cost, bound_cost;
4832   bool ok;
4833   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4834   tree *control_var, *bound_cst;
4835   enum tree_code comp = ERROR_MARK;
4836
4837   /* Only consider real candidates.  */
4838   if (!cand->iv)
4839     {
4840       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4841                        ERROR_MARK, -1);
4842       return false;
4843     }
4844
4845   /* Try iv elimination.  */
4846   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4847     {
4848       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4849       if (elim_cost.cost == 0)
4850         elim_cost.cost = parm_decl_cost (data, bound);
4851       else if (TREE_CODE (bound) == INTEGER_CST)
4852         elim_cost.cost = 0;
4853       /* If we replace a loop condition 'i < n' with 'p < base + n',
4854          depends_on_elim will have 'base' and 'n' set, which implies
4855          that both 'base' and 'n' will be live during the loop.  More likely,
4856          'base + n' will be loop invariant, resulting in only one live value
4857          during the loop.  So in that case we clear depends_on_elim and set
4858         elim_inv_expr_id instead.  */
4859       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4860         {
4861           elim_inv_expr_id = get_expr_id (data, bound);
4862           bitmap_clear (depends_on_elim);
4863         }
4864       /* The bound is a loop invariant, so it will be only computed
4865          once.  */
4866       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4867     }
4868   else
4869     elim_cost = infinite_cost;
4870
4871   /* Try expressing the original giv.  If it is compared with an invariant,
4872      note that we cannot get rid of it.  */
4873   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4874                               NULL, &cmp_iv);
4875   gcc_assert (ok);
4876
4877   /* When the condition is a comparison of the candidate IV against
4878      zero, prefer this IV.
4879
4880      TODO: The constant that we're subtracting from the cost should
4881      be target-dependent.  This information should be added to the
4882      target costs for each backend.  */
4883   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4884       && integer_zerop (*bound_cst)
4885       && (operand_equal_p (*control_var, cand->var_after, 0)
4886           || operand_equal_p (*control_var, cand->var_before, 0)))
4887     elim_cost.cost -= 1;
4888
4889   express_cost = get_computation_cost (data, use, cand, false,
4890                                        &depends_on_express, NULL,
4891                                        &express_inv_expr_id);
4892   fd_ivopts_data = data;
4893   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4894
4895   /* Count the cost of the original bound as well.  */
4896   bound_cost = force_var_cost (data, *bound_cst, NULL);
4897   if (bound_cost.cost == 0)
4898     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4899   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4900     bound_cost.cost = 0;
4901   express_cost.cost += bound_cost.cost;
4902
4903   /* Choose the better approach, preferring the eliminated IV. */
4904   if (compare_costs (elim_cost, express_cost) <= 0)
4905     {
4906       cost = elim_cost;
4907       depends_on = depends_on_elim;
4908       depends_on_elim = NULL;
4909       inv_expr_id = elim_inv_expr_id;
4910     }
4911   else
4912     {
4913       cost = express_cost;
4914       depends_on = depends_on_express;
4915       depends_on_express = NULL;
4916       bound = NULL_TREE;
4917       comp = ERROR_MARK;
4918       inv_expr_id = express_inv_expr_id;
4919     }
4920
4921   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4922
4923   if (depends_on_elim)
4924     BITMAP_FREE (depends_on_elim);
4925   if (depends_on_express)
4926     BITMAP_FREE (depends_on_express);
4927
4928   return !infinite_cost_p (cost);
4929 }
4930
4931 /* Determines cost of basing replacement of USE on CAND.  Returns false
4932    if USE cannot be based on CAND.  */
4933
4934 static bool
4935 determine_use_iv_cost (struct ivopts_data *data,
4936                        struct iv_use *use, struct iv_cand *cand)
4937 {
4938   switch (use->type)
4939     {
4940     case USE_NONLINEAR_EXPR:
4941       return determine_use_iv_cost_generic (data, use, cand);
4942
4943     case USE_ADDRESS:
4944       return determine_use_iv_cost_address (data, use, cand);
4945
4946     case USE_COMPARE:
4947       return determine_use_iv_cost_condition (data, use, cand);
4948
4949     default:
4950       gcc_unreachable ();
4951     }
4952 }
4953
4954 /* Return true if get_computation_cost indicates that autoincrement is
4955    a possibility for the pair of USE and CAND, false otherwise.  */
4956
4957 static bool
4958 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4959                            struct iv_cand *cand)
4960 {
4961   bitmap depends_on;
4962   bool can_autoinc;
4963   comp_cost cost;
4964
4965   if (use->type != USE_ADDRESS)
4966     return false;
4967
4968   cost = get_computation_cost (data, use, cand, true, &depends_on,
4969                                &can_autoinc, NULL);
4970
4971   BITMAP_FREE (depends_on);
4972
4973   return !infinite_cost_p (cost) && can_autoinc;
4974 }
4975
4976 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4977    use that allows autoincrement, and set their AINC_USE if possible.  */
4978
4979 static void
4980 set_autoinc_for_original_candidates (struct ivopts_data *data)
4981 {
4982   unsigned i, j;
4983
4984   for (i = 0; i < n_iv_cands (data); i++)
4985     {
4986       struct iv_cand *cand = iv_cand (data, i);
4987       struct iv_use *closest_before = NULL;
4988       struct iv_use *closest_after = NULL;
4989       if (cand->pos != IP_ORIGINAL)
4990         continue;
4991
4992       for (j = 0; j < n_iv_uses (data); j++)
4993         {
4994           struct iv_use *use = iv_use (data, j);
4995           unsigned uid = gimple_uid (use->stmt);
4996
4997           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4998             continue;
4999
5000           if (uid < gimple_uid (cand->incremented_at)
5001               && (closest_before == NULL
5002                   || uid > gimple_uid (closest_before->stmt)))
5003             closest_before = use;
5004
5005           if (uid > gimple_uid (cand->incremented_at)
5006               && (closest_after == NULL
5007                   || uid < gimple_uid (closest_after->stmt)))
5008             closest_after = use;
5009         }
5010
5011       if (closest_before != NULL
5012           && autoinc_possible_for_pair (data, closest_before, cand))
5013         cand->ainc_use = closest_before;
5014       else if (closest_after != NULL
5015                && autoinc_possible_for_pair (data, closest_after, cand))
5016         cand->ainc_use = closest_after;
5017     }
5018 }
5019
5020 /* Finds the candidates for the induction variables.  */
5021
5022 static void
5023 find_iv_candidates (struct ivopts_data *data)
5024 {
5025   /* Add commonly used ivs.  */
5026   add_standard_iv_candidates (data);
5027
5028   /* Add old induction variables.  */
5029   add_old_ivs_candidates (data);
5030
5031   /* Add induction variables derived from uses.  */
5032   add_derived_ivs_candidates (data);
5033
5034   set_autoinc_for_original_candidates (data);
5035
5036   /* Record the important candidates.  */
5037   record_important_candidates (data);
5038 }
5039
5040 /* Determines costs of basing the use of the iv on an iv candidate.  */
5041
5042 static void
5043 determine_use_iv_costs (struct ivopts_data *data)
5044 {
5045   unsigned i, j;
5046   struct iv_use *use;
5047   struct iv_cand *cand;
5048   bitmap to_clear = BITMAP_ALLOC (NULL);
5049
5050   alloc_use_cost_map (data);
5051
5052   for (i = 0; i < n_iv_uses (data); i++)
5053     {
5054       use = iv_use (data, i);
5055
5056       if (data->consider_all_candidates)
5057         {
5058           for (j = 0; j < n_iv_cands (data); j++)
5059             {
5060               cand = iv_cand (data, j);
5061               determine_use_iv_cost (data, use, cand);
5062             }
5063         }
5064       else
5065         {
5066           bitmap_iterator bi;
5067
5068           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5069             {
5070               cand = iv_cand (data, j);
5071               if (!determine_use_iv_cost (data, use, cand))
5072                 bitmap_set_bit (to_clear, j);
5073             }
5074
5075           /* Remove the candidates for that the cost is infinite from
5076              the list of related candidates.  */
5077           bitmap_and_compl_into (use->related_cands, to_clear);
5078           bitmap_clear (to_clear);
5079         }
5080     }
5081
5082   BITMAP_FREE (to_clear);
5083
5084   if (dump_file && (dump_flags & TDF_DETAILS))
5085     {
5086       fprintf (dump_file, "Use-candidate costs:\n");
5087
5088       for (i = 0; i < n_iv_uses (data); i++)
5089         {
5090           use = iv_use (data, i);
5091
5092           fprintf (dump_file, "Use %d:\n", i);
5093           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5094           for (j = 0; j < use->n_map_members; j++)
5095             {
5096               if (!use->cost_map[j].cand
5097                   || infinite_cost_p (use->cost_map[j].cost))
5098                 continue;
5099
5100               fprintf (dump_file, "  %d\t%d\t%d\t",
5101                        use->cost_map[j].cand->id,
5102                        use->cost_map[j].cost.cost,
5103                        use->cost_map[j].cost.complexity);
5104               if (use->cost_map[j].depends_on)
5105                 bitmap_print (dump_file,
5106                               use->cost_map[j].depends_on, "","");
5107               if (use->cost_map[j].inv_expr_id != -1)
5108                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5109               fprintf (dump_file, "\n");
5110             }
5111
5112           fprintf (dump_file, "\n");
5113         }
5114       fprintf (dump_file, "\n");
5115     }
5116 }
5117
5118 /* Determines cost of the candidate CAND.  */
5119
5120 static void
5121 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5122 {
5123   comp_cost cost_base;
5124   unsigned cost, cost_step;
5125   tree base;
5126
5127   if (!cand->iv)
5128     {
5129       cand->cost = 0;
5130       return;
5131     }
5132
5133   /* There are two costs associated with the candidate -- its increment
5134      and its initialization.  The second is almost negligible for any loop
5135      that rolls enough, so we take it just very little into account.  */
5136
5137   base = cand->iv->base;
5138   cost_base = force_var_cost (data, base, NULL);
5139   /* It will be exceptional that the iv register happens to be initialized with
5140      the proper value at no cost.  In general, there will at least be a regcopy
5141      or a const set.  */
5142   if (cost_base.cost == 0)
5143     cost_base.cost = COSTS_N_INSNS (1);
5144   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5145
5146   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5147
5148   /* Prefer the original ivs unless we may gain something by replacing it.
5149      The reason is to make debugging simpler; so this is not relevant for
5150      artificial ivs created by other optimization passes.  */
5151   if (cand->pos != IP_ORIGINAL
5152       || !SSA_NAME_VAR (cand->var_before)
5153       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5154     cost++;
5155
5156   /* Prefer not to insert statements into latch unless there are some
5157      already (so that we do not create unnecessary jumps).  */
5158   if (cand->pos == IP_END
5159       && empty_block_p (ip_end_pos (data->current_loop)))
5160     cost++;
5161
5162   cand->cost = cost;
5163   cand->cost_step = cost_step;
5164 }
5165
5166 /* Determines costs of computation of the candidates.  */
5167
5168 static void
5169 determine_iv_costs (struct ivopts_data *data)
5170 {
5171   unsigned i;
5172
5173   if (dump_file && (dump_flags & TDF_DETAILS))
5174     {
5175       fprintf (dump_file, "Candidate costs:\n");
5176       fprintf (dump_file, "  cand\tcost\n");
5177     }
5178
5179   for (i = 0; i < n_iv_cands (data); i++)
5180     {
5181       struct iv_cand *cand = iv_cand (data, i);
5182
5183       determine_iv_cost (data, cand);
5184
5185       if (dump_file && (dump_flags & TDF_DETAILS))
5186         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5187     }
5188
5189   if (dump_file && (dump_flags & TDF_DETAILS))
5190     fprintf (dump_file, "\n");
5191 }
5192
5193 /* Calculates cost for having SIZE induction variables.  */
5194
5195 static unsigned
5196 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5197 {
5198   /* We add size to the cost, so that we prefer eliminating ivs
5199      if possible.  */
5200   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5201                                             data->body_includes_call);
5202 }
5203
5204 /* For each size of the induction variable set determine the penalty.  */
5205
5206 static void
5207 determine_set_costs (struct ivopts_data *data)
5208 {
5209   unsigned j, n;
5210   gimple phi;
5211   gimple_stmt_iterator psi;
5212   tree op;
5213   struct loop *loop = data->current_loop;
5214   bitmap_iterator bi;
5215
5216   if (dump_file && (dump_flags & TDF_DETAILS))
5217     {
5218       fprintf (dump_file, "Global costs:\n");
5219       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5220       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5221       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5222       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5223     }
5224
5225   n = 0;
5226   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5227     {
5228       phi = gsi_stmt (psi);
5229       op = PHI_RESULT (phi);
5230
5231       if (virtual_operand_p (op))
5232         continue;
5233
5234       if (get_iv (data, op))
5235         continue;
5236
5237       n++;
5238     }
5239
5240   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5241     {
5242       struct version_info *info = ver_info (data, j);
5243
5244       if (info->inv_id && info->has_nonlin_use)
5245         n++;
5246     }
5247
5248   data->regs_used = n;
5249   if (dump_file && (dump_flags & TDF_DETAILS))
5250     fprintf (dump_file, "  regs_used %d\n", n);
5251
5252   if (dump_file && (dump_flags & TDF_DETAILS))
5253     {
5254       fprintf (dump_file, "  cost for size:\n");
5255       fprintf (dump_file, "  ivs\tcost\n");
5256       for (j = 0; j <= 2 * target_avail_regs; j++)
5257         fprintf (dump_file, "  %d\t%d\n", j,
5258                  ivopts_global_cost_for_size (data, j));
5259       fprintf (dump_file, "\n");
5260     }
5261 }
5262
5263 /* Returns true if A is a cheaper cost pair than B.  */
5264
5265 static bool
5266 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5267 {
5268   int cmp;
5269
5270   if (!a)
5271     return false;
5272
5273   if (!b)
5274     return true;
5275
5276   cmp = compare_costs (a->cost, b->cost);
5277   if (cmp < 0)
5278     return true;
5279
5280   if (cmp > 0)
5281     return false;
5282
5283   /* In case the costs are the same, prefer the cheaper candidate.  */
5284   if (a->cand->cost < b->cand->cost)
5285     return true;
5286
5287   return false;
5288 }
5289
5290
5291 /* Returns candidate by that USE is expressed in IVS.  */
5292
5293 static struct cost_pair *
5294 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5295 {
5296   return ivs->cand_for_use[use->id];
5297 }
5298
5299 /* Computes the cost field of IVS structure.  */
5300
5301 static void
5302 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5303 {
5304   comp_cost cost = ivs->cand_use_cost;
5305
5306   cost.cost += ivs->cand_cost;
5307
5308   cost.cost += ivopts_global_cost_for_size (data,
5309                                             ivs->n_regs + ivs->num_used_inv_expr);
5310
5311   ivs->cost = cost;
5312 }
5313
5314 /* Remove invariants in set INVS to set IVS.  */
5315
5316 static void
5317 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5318 {
5319   bitmap_iterator bi;
5320   unsigned iid;
5321
5322   if (!invs)
5323     return;
5324
5325   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5326     {
5327       ivs->n_invariant_uses[iid]--;
5328       if (ivs->n_invariant_uses[iid] == 0)
5329         ivs->n_regs--;
5330     }
5331 }
5332
5333 /* Set USE not to be expressed by any candidate in IVS.  */
5334
5335 static void
5336 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5337                  struct iv_use *use)
5338 {
5339   unsigned uid = use->id, cid;
5340   struct cost_pair *cp;
5341
5342   cp = ivs->cand_for_use[uid];
5343   if (!cp)
5344     return;
5345   cid = cp->cand->id;
5346
5347   ivs->bad_uses++;
5348   ivs->cand_for_use[uid] = NULL;
5349   ivs->n_cand_uses[cid]--;
5350
5351   if (ivs->n_cand_uses[cid] == 0)
5352     {
5353       bitmap_clear_bit (ivs->cands, cid);
5354       /* Do not count the pseudocandidates.  */
5355       if (cp->cand->iv)
5356         ivs->n_regs--;
5357       ivs->n_cands--;
5358       ivs->cand_cost -= cp->cand->cost;
5359
5360       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5361     }
5362
5363   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5364
5365   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5366
5367   if (cp->inv_expr_id != -1)
5368     {
5369       ivs->used_inv_expr[cp->inv_expr_id]--;
5370       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5371         ivs->num_used_inv_expr--;
5372     }
5373   iv_ca_recount_cost (data, ivs);
5374 }
5375
5376 /* Add invariants in set INVS to set IVS.  */
5377
5378 static void
5379 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5380 {
5381   bitmap_iterator bi;
5382   unsigned iid;
5383
5384   if (!invs)
5385     return;
5386
5387   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5388     {
5389       ivs->n_invariant_uses[iid]++;
5390       if (ivs->n_invariant_uses[iid] == 1)
5391         ivs->n_regs++;
5392     }
5393 }
5394
5395 /* Set cost pair for USE in set IVS to CP.  */
5396
5397 static void
5398 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5399               struct iv_use *use, struct cost_pair *cp)
5400 {
5401   unsigned uid = use->id, cid;
5402
5403   if (ivs->cand_for_use[uid] == cp)
5404     return;
5405
5406   if (ivs->cand_for_use[uid])
5407     iv_ca_set_no_cp (data, ivs, use);
5408
5409   if (cp)
5410     {
5411       cid = cp->cand->id;
5412
5413       ivs->bad_uses--;
5414       ivs->cand_for_use[uid] = cp;
5415       ivs->n_cand_uses[cid]++;
5416       if (ivs->n_cand_uses[cid] == 1)
5417         {
5418           bitmap_set_bit (ivs->cands, cid);
5419           /* Do not count the pseudocandidates.  */
5420           if (cp->cand->iv)
5421             ivs->n_regs++;
5422           ivs->n_cands++;
5423           ivs->cand_cost += cp->cand->cost;
5424
5425           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5426         }
5427
5428       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5429       iv_ca_set_add_invariants (ivs, cp->depends_on);
5430
5431       if (cp->inv_expr_id != -1)
5432         {
5433           ivs->used_inv_expr[cp->inv_expr_id]++;
5434           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5435             ivs->num_used_inv_expr++;
5436         }
5437       iv_ca_recount_cost (data, ivs);
5438     }
5439 }
5440
5441 /* Extend set IVS by expressing USE by some of the candidates in it
5442    if possible. All important candidates will be considered
5443    if IMPORTANT_CANDIDATES is true.  */
5444
5445 static void
5446 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5447                struct iv_use *use, bool important_candidates)
5448 {
5449   struct cost_pair *best_cp = NULL, *cp;
5450   bitmap_iterator bi;
5451   bitmap cands;
5452   unsigned i;
5453
5454   gcc_assert (ivs->upto >= use->id);
5455
5456   if (ivs->upto == use->id)
5457     {
5458       ivs->upto++;
5459       ivs->bad_uses++;
5460     }
5461
5462   cands = (important_candidates ? data->important_candidates : ivs->cands);
5463   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5464     {
5465       struct iv_cand *cand = iv_cand (data, i);
5466
5467       cp = get_use_iv_cost (data, use, cand);
5468
5469       if (cheaper_cost_pair (cp, best_cp))
5470         best_cp = cp;
5471     }
5472
5473   iv_ca_set_cp (data, ivs, use, best_cp);
5474 }
5475
5476 /* Get cost for assignment IVS.  */
5477
5478 static comp_cost
5479 iv_ca_cost (struct iv_ca *ivs)
5480 {
5481   /* This was a conditional expression but it triggered a bug in
5482      Sun C 5.5.  */
5483   if (ivs->bad_uses)
5484     return infinite_cost;
5485   else
5486     return ivs->cost;
5487 }
5488
5489 /* Returns true if all dependences of CP are among invariants in IVS.  */
5490
5491 static bool
5492 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5493 {
5494   unsigned i;
5495   bitmap_iterator bi;
5496
5497   if (!cp->depends_on)
5498     return true;
5499
5500   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5501     {
5502       if (ivs->n_invariant_uses[i] == 0)
5503         return false;
5504     }
5505
5506   return true;
5507 }
5508
5509 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5510    it before NEXT_CHANGE.  */
5511
5512 static struct iv_ca_delta *
5513 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5514                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5515 {
5516   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5517
5518   change->use = use;
5519   change->old_cp = old_cp;
5520   change->new_cp = new_cp;
5521   change->next_change = next_change;
5522
5523   return change;
5524 }
5525
5526 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5527    are rewritten.  */
5528
5529 static struct iv_ca_delta *
5530 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5531 {
5532   struct iv_ca_delta *last;
5533
5534   if (!l2)
5535     return l1;
5536
5537   if (!l1)
5538     return l2;
5539
5540   for (last = l1; last->next_change; last = last->next_change)
5541     continue;
5542   last->next_change = l2;
5543
5544   return l1;
5545 }
5546
5547 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5548
5549 static struct iv_ca_delta *
5550 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5551 {
5552   struct iv_ca_delta *act, *next, *prev = NULL;
5553   struct cost_pair *tmp;
5554
5555   for (act = delta; act; act = next)
5556     {
5557       next = act->next_change;
5558       act->next_change = prev;
5559       prev = act;
5560
5561       tmp = act->old_cp;
5562       act->old_cp = act->new_cp;
5563       act->new_cp = tmp;
5564     }
5565
5566   return prev;
5567 }
5568
5569 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5570    reverted instead.  */
5571
5572 static void
5573 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5574                     struct iv_ca_delta *delta, bool forward)
5575 {
5576   struct cost_pair *from, *to;
5577   struct iv_ca_delta *act;
5578
5579   if (!forward)
5580     delta = iv_ca_delta_reverse (delta);
5581
5582   for (act = delta; act; act = act->next_change)
5583     {
5584       from = act->old_cp;
5585       to = act->new_cp;
5586       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5587       iv_ca_set_cp (data, ivs, act->use, to);
5588     }
5589
5590   if (!forward)
5591     iv_ca_delta_reverse (delta);
5592 }
5593
5594 /* Returns true if CAND is used in IVS.  */
5595
5596 static bool
5597 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5598 {
5599   return ivs->n_cand_uses[cand->id] > 0;
5600 }
5601
5602 /* Returns number of induction variable candidates in the set IVS.  */
5603
5604 static unsigned
5605 iv_ca_n_cands (struct iv_ca *ivs)
5606 {
5607   return ivs->n_cands;
5608 }
5609
5610 /* Free the list of changes DELTA.  */
5611
5612 static void
5613 iv_ca_delta_free (struct iv_ca_delta **delta)
5614 {
5615   struct iv_ca_delta *act, *next;
5616
5617   for (act = *delta; act; act = next)
5618     {
5619       next = act->next_change;
5620       free (act);
5621     }
5622
5623   *delta = NULL;
5624 }
5625
5626 /* Allocates new iv candidates assignment.  */
5627
5628 static struct iv_ca *
5629 iv_ca_new (struct ivopts_data *data)
5630 {
5631   struct iv_ca *nw = XNEW (struct iv_ca);
5632
5633   nw->upto = 0;
5634   nw->bad_uses = 0;
5635   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5636   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5637   nw->cands = BITMAP_ALLOC (NULL);
5638   nw->n_cands = 0;
5639   nw->n_regs = 0;
5640   nw->cand_use_cost = no_cost;
5641   nw->cand_cost = 0;
5642   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5643   nw->cost = no_cost;
5644   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5645   nw->num_used_inv_expr = 0;
5646
5647   return nw;
5648 }
5649
5650 /* Free memory occupied by the set IVS.  */
5651
5652 static void
5653 iv_ca_free (struct iv_ca **ivs)
5654 {
5655   free ((*ivs)->cand_for_use);
5656   free ((*ivs)->n_cand_uses);
5657   BITMAP_FREE ((*ivs)->cands);
5658   free ((*ivs)->n_invariant_uses);
5659   free ((*ivs)->used_inv_expr);
5660   free (*ivs);
5661   *ivs = NULL;
5662 }
5663
5664 /* Dumps IVS to FILE.  */
5665
5666 static void
5667 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5668 {
5669   const char *pref = "  invariants ";
5670   unsigned i;
5671   comp_cost cost = iv_ca_cost (ivs);
5672
5673   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5674   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5675            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5676   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5677
5678    for (i = 0; i < ivs->upto; i++)
5679     {
5680       struct iv_use *use = iv_use (data, i);
5681       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5682       if (cp)
5683         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5684                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5685       else
5686         fprintf (file, "   use:%d --> ??\n", use->id);
5687     }
5688
5689   for (i = 1; i <= data->max_inv_id; i++)
5690     if (ivs->n_invariant_uses[i])
5691       {
5692         fprintf (file, "%s%d", pref, i);
5693         pref = ", ";
5694       }
5695   fprintf (file, "\n\n");
5696 }
5697
5698 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5699    new set, and store differences in DELTA.  Number of induction variables
5700    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5701    the function will try to find a solution with mimimal iv candidates.  */
5702
5703 static comp_cost
5704 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5705               struct iv_cand *cand, struct iv_ca_delta **delta,
5706               unsigned *n_ivs, bool min_ncand)
5707 {
5708   unsigned i;
5709   comp_cost cost;
5710   struct iv_use *use;
5711   struct cost_pair *old_cp, *new_cp;
5712
5713   *delta = NULL;
5714   for (i = 0; i < ivs->upto; i++)
5715     {
5716       use = iv_use (data, i);
5717       old_cp = iv_ca_cand_for_use (ivs, use);
5718
5719       if (old_cp
5720           && old_cp->cand == cand)
5721         continue;
5722
5723       new_cp = get_use_iv_cost (data, use, cand);
5724       if (!new_cp)
5725         continue;
5726
5727       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5728         continue;
5729
5730       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5731         continue;
5732
5733       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5734     }
5735
5736   iv_ca_delta_commit (data, ivs, *delta, true);
5737   cost = iv_ca_cost (ivs);
5738   if (n_ivs)
5739     *n_ivs = iv_ca_n_cands (ivs);
5740   iv_ca_delta_commit (data, ivs, *delta, false);
5741
5742   return cost;
5743 }
5744
5745 /* Try narrowing set IVS by removing CAND.  Return the cost of
5746    the new set and store the differences in DELTA.  START is
5747    the candidate with which we start narrowing.  */
5748
5749 static comp_cost
5750 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5751               struct iv_cand *cand, struct iv_cand *start,
5752               struct iv_ca_delta **delta)
5753 {
5754   unsigned i, ci;
5755   struct iv_use *use;
5756   struct cost_pair *old_cp, *new_cp, *cp;
5757   bitmap_iterator bi;
5758   struct iv_cand *cnd;
5759   comp_cost cost, best_cost, acost;
5760
5761   *delta = NULL;
5762   for (i = 0; i < n_iv_uses (data); i++)
5763     {
5764       use = iv_use (data, i);
5765
5766       old_cp = iv_ca_cand_for_use (ivs, use);
5767       if (old_cp->cand != cand)
5768         continue;
5769
5770       best_cost = iv_ca_cost (ivs);
5771       /* Start narrowing with START.  */
5772       new_cp = get_use_iv_cost (data, use, start);
5773
5774       if (data->consider_all_candidates)
5775         {
5776           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5777             {
5778               if (ci == cand->id || (start && ci == start->id))
5779                 continue;
5780
5781               cnd = iv_cand (data, ci);
5782
5783               cp = get_use_iv_cost (data, use, cnd);
5784               if (!cp)
5785                 continue;
5786
5787               iv_ca_set_cp (data, ivs, use, cp);
5788               acost = iv_ca_cost (ivs);
5789
5790               if (compare_costs (acost, best_cost) < 0)
5791                 {
5792                   best_cost = acost;
5793                   new_cp = cp;
5794                 }
5795             }
5796         }
5797       else
5798         {
5799           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5800             {
5801               if (ci == cand->id || (start && ci == start->id))
5802                 continue;
5803
5804               cnd = iv_cand (data, ci);
5805
5806               cp = get_use_iv_cost (data, use, cnd);
5807               if (!cp)
5808                 continue;
5809
5810               iv_ca_set_cp (data, ivs, use, cp);
5811               acost = iv_ca_cost (ivs);
5812
5813               if (compare_costs (acost, best_cost) < 0)
5814                 {
5815                   best_cost = acost;
5816                   new_cp = cp;
5817                 }
5818             }
5819         }
5820       /* Restore to old cp for use.  */
5821       iv_ca_set_cp (data, ivs, use, old_cp);
5822
5823       if (!new_cp)
5824         {
5825           iv_ca_delta_free (delta);
5826           return infinite_cost;
5827         }
5828
5829       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5830     }
5831
5832   iv_ca_delta_commit (data, ivs, *delta, true);
5833   cost = iv_ca_cost (ivs);
5834   iv_ca_delta_commit (data, ivs, *delta, false);
5835
5836   return cost;
5837 }
5838
5839 /* Try optimizing the set of candidates IVS by removing candidates different
5840    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5841    differences in DELTA.  */
5842
5843 static comp_cost
5844 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5845              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5846 {
5847   bitmap_iterator bi;
5848   struct iv_ca_delta *act_delta, *best_delta;
5849   unsigned i;
5850   comp_cost best_cost, acost;
5851   struct iv_cand *cand;
5852
5853   best_delta = NULL;
5854   best_cost = iv_ca_cost (ivs);
5855
5856   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5857     {
5858       cand = iv_cand (data, i);
5859
5860       if (cand == except_cand)
5861         continue;
5862
5863       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5864
5865       if (compare_costs (acost, best_cost) < 0)
5866         {
5867           best_cost = acost;
5868           iv_ca_delta_free (&best_delta);
5869           best_delta = act_delta;
5870         }
5871       else
5872         iv_ca_delta_free (&act_delta);
5873     }
5874
5875   if (!best_delta)
5876     {
5877       *delta = NULL;
5878       return best_cost;
5879     }
5880
5881   /* Recurse to possibly remove other unnecessary ivs.  */
5882   iv_ca_delta_commit (data, ivs, best_delta, true);
5883   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5884   iv_ca_delta_commit (data, ivs, best_delta, false);
5885   *delta = iv_ca_delta_join (best_delta, *delta);
5886   return best_cost;
5887 }
5888
5889 /* Tries to extend the sets IVS in the best possible way in order
5890    to express the USE.  If ORIGINALP is true, prefer candidates from
5891    the original set of IVs, otherwise favor important candidates not
5892    based on any memory object.  */
5893
5894 static bool
5895 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5896                   struct iv_use *use, bool originalp)
5897 {
5898   comp_cost best_cost, act_cost;
5899   unsigned i;
5900   bitmap_iterator bi;
5901   struct iv_cand *cand;
5902   struct iv_ca_delta *best_delta = NULL, *act_delta;
5903   struct cost_pair *cp;
5904
5905   iv_ca_add_use (data, ivs, use, false);
5906   best_cost = iv_ca_cost (ivs);
5907
5908   cp = iv_ca_cand_for_use (ivs, use);
5909   if (!cp)
5910     {
5911       ivs->upto--;
5912       ivs->bad_uses--;
5913       iv_ca_add_use (data, ivs, use, true);
5914       best_cost = iv_ca_cost (ivs);
5915       cp = iv_ca_cand_for_use (ivs, use);
5916     }
5917   if (cp)
5918     {
5919       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5920       iv_ca_set_no_cp (data, ivs, use);
5921     }
5922
5923   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5924      first try important candidates not based on any memory object.  Only if
5925      this fails, try the specific ones.  Rationale -- in loops with many
5926      variables the best choice often is to use just one generic biv.  If we
5927      added here many ivs specific to the uses, the optimization algorithm later
5928      would be likely to get stuck in a local minimum, thus causing us to create
5929      too many ivs.  The approach from few ivs to more seems more likely to be
5930      successful -- starting from few ivs, replacing an expensive use by a
5931      specific iv should always be a win.  */
5932   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5933     {
5934       cand = iv_cand (data, i);
5935
5936       if (originalp && cand->pos !=IP_ORIGINAL)
5937         continue;
5938
5939       if (!originalp && cand->iv->base_object != NULL_TREE)
5940         continue;
5941
5942       if (iv_ca_cand_used_p (ivs, cand))
5943         continue;
5944
5945       cp = get_use_iv_cost (data, use, cand);
5946       if (!cp)
5947         continue;
5948
5949       iv_ca_set_cp (data, ivs, use, cp);
5950       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5951                                true);
5952       iv_ca_set_no_cp (data, ivs, use);
5953       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5954
5955       if (compare_costs (act_cost, best_cost) < 0)
5956         {
5957           best_cost = act_cost;
5958
5959           iv_ca_delta_free (&best_delta);
5960           best_delta = act_delta;
5961         }
5962       else
5963         iv_ca_delta_free (&act_delta);
5964     }
5965
5966   if (infinite_cost_p (best_cost))
5967     {
5968       for (i = 0; i < use->n_map_members; i++)
5969         {
5970           cp = use->cost_map + i;
5971           cand = cp->cand;
5972           if (!cand)
5973             continue;
5974
5975           /* Already tried this.  */
5976           if (cand->important)
5977             {
5978               if (originalp && cand->pos == IP_ORIGINAL)
5979                 continue;
5980               if (!originalp && cand->iv->base_object == NULL_TREE)
5981                 continue;
5982             }
5983
5984           if (iv_ca_cand_used_p (ivs, cand))
5985             continue;
5986
5987           act_delta = NULL;
5988           iv_ca_set_cp (data, ivs, use, cp);
5989           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5990           iv_ca_set_no_cp (data, ivs, use);
5991           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5992                                        cp, act_delta);
5993
5994           if (compare_costs (act_cost, best_cost) < 0)
5995             {
5996               best_cost = act_cost;
5997
5998               if (best_delta)
5999                 iv_ca_delta_free (&best_delta);
6000               best_delta = act_delta;
6001             }
6002           else
6003             iv_ca_delta_free (&act_delta);
6004         }
6005     }
6006
6007   iv_ca_delta_commit (data, ivs, best_delta, true);
6008   iv_ca_delta_free (&best_delta);
6009
6010   return !infinite_cost_p (best_cost);
6011 }
6012
6013 /* Finds an initial assignment of candidates to uses.  */
6014
6015 static struct iv_ca *
6016 get_initial_solution (struct ivopts_data *data, bool originalp)
6017 {
6018   struct iv_ca *ivs = iv_ca_new (data);
6019   unsigned i;
6020
6021   for (i = 0; i < n_iv_uses (data); i++)
6022     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6023       {
6024         iv_ca_free (&ivs);
6025         return NULL;
6026       }
6027
6028   return ivs;
6029 }
6030
6031 /* Tries to improve set of induction variables IVS.  */
6032
6033 static bool
6034 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6035 {
6036   unsigned i, n_ivs;
6037   comp_cost acost, best_cost = iv_ca_cost (ivs);
6038   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6039   struct iv_cand *cand;
6040
6041   /* Try extending the set of induction variables by one.  */
6042   for (i = 0; i < n_iv_cands (data); i++)
6043     {
6044       cand = iv_cand (data, i);
6045
6046       if (iv_ca_cand_used_p (ivs, cand))
6047         continue;
6048
6049       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6050       if (!act_delta)
6051         continue;
6052
6053       /* If we successfully added the candidate and the set is small enough,
6054          try optimizing it by removing other candidates.  */
6055       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6056         {
6057           iv_ca_delta_commit (data, ivs, act_delta, true);
6058           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6059           iv_ca_delta_commit (data, ivs, act_delta, false);
6060           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6061         }
6062
6063       if (compare_costs (acost, best_cost) < 0)
6064         {
6065           best_cost = acost;
6066           iv_ca_delta_free (&best_delta);
6067           best_delta = act_delta;
6068         }
6069       else
6070         iv_ca_delta_free (&act_delta);
6071     }
6072
6073   if (!best_delta)
6074     {
6075       /* Try removing the candidates from the set instead.  */
6076       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6077
6078       /* Nothing more we can do.  */
6079       if (!best_delta)
6080         return false;
6081     }
6082
6083   iv_ca_delta_commit (data, ivs, best_delta, true);
6084   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6085   iv_ca_delta_free (&best_delta);
6086   return true;
6087 }
6088
6089 /* Attempts to find the optimal set of induction variables.  We do simple
6090    greedy heuristic -- we try to replace at most one candidate in the selected
6091    solution and remove the unused ivs while this improves the cost.  */
6092
6093 static struct iv_ca *
6094 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6095 {
6096   struct iv_ca *set;
6097
6098   /* Get the initial solution.  */
6099   set = get_initial_solution (data, originalp);
6100   if (!set)
6101     {
6102       if (dump_file && (dump_flags & TDF_DETAILS))
6103         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6104       return NULL;
6105     }
6106
6107   if (dump_file && (dump_flags & TDF_DETAILS))
6108     {
6109       fprintf (dump_file, "Initial set of candidates:\n");
6110       iv_ca_dump (data, dump_file, set);
6111     }
6112
6113   while (try_improve_iv_set (data, set))
6114     {
6115       if (dump_file && (dump_flags & TDF_DETAILS))
6116         {
6117           fprintf (dump_file, "Improved to:\n");
6118           iv_ca_dump (data, dump_file, set);
6119         }
6120     }
6121
6122   return set;
6123 }
6124
6125 static struct iv_ca *
6126 find_optimal_iv_set (struct ivopts_data *data)
6127 {
6128   unsigned i;
6129   struct iv_ca *set, *origset;
6130   struct iv_use *use;
6131   comp_cost cost, origcost;
6132
6133   /* Determine the cost based on a strategy that starts with original IVs,
6134      and try again using a strategy that prefers candidates not based
6135      on any IVs.  */
6136   origset = find_optimal_iv_set_1 (data, true);
6137   set = find_optimal_iv_set_1 (data, false);
6138
6139   if (!origset && !set)
6140     return NULL;
6141
6142   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6143   cost = set ? iv_ca_cost (set) : infinite_cost;
6144
6145   if (dump_file && (dump_flags & TDF_DETAILS))
6146     {
6147       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6148                origcost.cost, origcost.complexity);
6149       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6150                cost.cost, cost.complexity);
6151     }
6152
6153   /* Choose the one with the best cost.  */
6154   if (compare_costs (origcost, cost) <= 0)
6155     {
6156       if (set)
6157         iv_ca_free (&set);
6158       set = origset;
6159     }
6160   else if (origset)
6161     iv_ca_free (&origset);
6162
6163   for (i = 0; i < n_iv_uses (data); i++)
6164     {
6165       use = iv_use (data, i);
6166       use->selected = iv_ca_cand_for_use (set, use)->cand;
6167     }
6168
6169   return set;
6170 }
6171
6172 /* Creates a new induction variable corresponding to CAND.  */
6173
6174 static void
6175 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6176 {
6177   gimple_stmt_iterator incr_pos;
6178   tree base;
6179   bool after = false;
6180
6181   if (!cand->iv)
6182     return;
6183
6184   switch (cand->pos)
6185     {
6186     case IP_NORMAL:
6187       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6188       break;
6189
6190     case IP_END:
6191       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6192       after = true;
6193       break;
6194
6195     case IP_AFTER_USE:
6196       after = true;
6197       /* fall through */
6198     case IP_BEFORE_USE:
6199       incr_pos = gsi_for_stmt (cand->incremented_at);
6200       break;
6201
6202     case IP_ORIGINAL:
6203       /* Mark that the iv is preserved.  */
6204       name_info (data, cand->var_before)->preserve_biv = true;
6205       name_info (data, cand->var_after)->preserve_biv = true;
6206
6207       /* Rewrite the increment so that it uses var_before directly.  */
6208       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6209       return;
6210     }
6211
6212   gimple_add_tmp_var (cand->var_before);
6213
6214   base = unshare_expr (cand->iv->base);
6215
6216   create_iv (base, unshare_expr (cand->iv->step),
6217              cand->var_before, data->current_loop,
6218              &incr_pos, after, &cand->var_before, &cand->var_after);
6219 }
6220
6221 /* Creates new induction variables described in SET.  */
6222
6223 static void
6224 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6225 {
6226   unsigned i;
6227   struct iv_cand *cand;
6228   bitmap_iterator bi;
6229
6230   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6231     {
6232       cand = iv_cand (data, i);
6233       create_new_iv (data, cand);
6234     }
6235
6236   if (dump_file && (dump_flags & TDF_DETAILS))
6237     {
6238       fprintf (dump_file, "\nSelected IV set: \n");
6239       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6240         {
6241           cand = iv_cand (data, i);
6242           dump_cand (dump_file, cand);
6243         }
6244       fprintf (dump_file, "\n");
6245     }
6246 }
6247
6248 /* Rewrites USE (definition of iv used in a nonlinear expression)
6249    using candidate CAND.  */
6250
6251 static void
6252 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6253                             struct iv_use *use, struct iv_cand *cand)
6254 {
6255   tree comp;
6256   tree op, tgt;
6257   gimple ass;
6258   gimple_stmt_iterator bsi;
6259
6260   /* An important special case -- if we are asked to express value of
6261      the original iv by itself, just exit; there is no need to
6262      introduce a new computation (that might also need casting the
6263      variable to unsigned and back).  */
6264   if (cand->pos == IP_ORIGINAL
6265       && cand->incremented_at == use->stmt)
6266     {
6267       enum tree_code stmt_code;
6268
6269       gcc_assert (is_gimple_assign (use->stmt));
6270       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6271
6272       /* Check whether we may leave the computation unchanged.
6273          This is the case only if it does not rely on other
6274          computations in the loop -- otherwise, the computation
6275          we rely upon may be removed in remove_unused_ivs,
6276          thus leading to ICE.  */
6277       stmt_code = gimple_assign_rhs_code (use->stmt);
6278       if (stmt_code == PLUS_EXPR
6279           || stmt_code == MINUS_EXPR
6280           || stmt_code == POINTER_PLUS_EXPR)
6281         {
6282           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6283             op = gimple_assign_rhs2 (use->stmt);
6284           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6285             op = gimple_assign_rhs1 (use->stmt);
6286           else
6287             op = NULL_TREE;
6288         }
6289       else
6290         op = NULL_TREE;
6291
6292       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6293         return;
6294     }
6295
6296   comp = get_computation (data->current_loop, use, cand);
6297   gcc_assert (comp != NULL_TREE);
6298
6299   switch (gimple_code (use->stmt))
6300     {
6301     case GIMPLE_PHI:
6302       tgt = PHI_RESULT (use->stmt);
6303
6304       /* If we should keep the biv, do not replace it.  */
6305       if (name_info (data, tgt)->preserve_biv)
6306         return;
6307
6308       bsi = gsi_after_labels (gimple_bb (use->stmt));
6309       break;
6310
6311     case GIMPLE_ASSIGN:
6312       tgt = gimple_assign_lhs (use->stmt);
6313       bsi = gsi_for_stmt (use->stmt);
6314       break;
6315
6316     default:
6317       gcc_unreachable ();
6318     }
6319
6320   if (!valid_gimple_rhs_p (comp)
6321       || (gimple_code (use->stmt) != GIMPLE_PHI
6322           /* We can't allow re-allocating the stmt as it might be pointed
6323              to still.  */
6324           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6325               >= gimple_num_ops (gsi_stmt (bsi)))))
6326     {
6327       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6328                                        true, GSI_SAME_STMT);
6329       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6330         {
6331           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6332           /* As this isn't a plain copy we have to reset alignment
6333              information.  */
6334           if (SSA_NAME_PTR_INFO (comp))
6335             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6336         }
6337     }
6338
6339   if (gimple_code (use->stmt) == GIMPLE_PHI)
6340     {
6341       ass = gimple_build_assign (tgt, comp);
6342       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6343
6344       bsi = gsi_for_stmt (use->stmt);
6345       remove_phi_node (&bsi, false);
6346     }
6347   else
6348     {
6349       gimple_assign_set_rhs_from_tree (&bsi, comp);
6350       use->stmt = gsi_stmt (bsi);
6351     }
6352 }
6353
6354 /* Performs a peephole optimization to reorder the iv update statement with
6355    a mem ref to enable instruction combining in later phases. The mem ref uses
6356    the iv value before the update, so the reordering transformation requires
6357    adjustment of the offset. CAND is the selected IV_CAND.
6358
6359    Example:
6360
6361    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6362    iv2 = iv1 + 1;
6363
6364    if (t < val)      (1)
6365      goto L;
6366    goto Head;
6367
6368
6369    directly propagating t over to (1) will introduce overlapping live range
6370    thus increase register pressure. This peephole transform it into:
6371
6372
6373    iv2 = iv1 + 1;
6374    t = MEM_REF (base, iv2, 8, 8);
6375    if (t < val)
6376      goto L;
6377    goto Head;
6378 */
6379
6380 static void
6381 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6382 {
6383   tree var_after;
6384   gimple iv_update, stmt;
6385   basic_block bb;
6386   gimple_stmt_iterator gsi, gsi_iv;
6387
6388   if (cand->pos != IP_NORMAL)
6389     return;
6390
6391   var_after = cand->var_after;
6392   iv_update = SSA_NAME_DEF_STMT (var_after);
6393
6394   bb = gimple_bb (iv_update);
6395   gsi = gsi_last_nondebug_bb (bb);
6396   stmt = gsi_stmt (gsi);
6397
6398   /* Only handle conditional statement for now.  */
6399   if (gimple_code (stmt) != GIMPLE_COND)
6400     return;
6401
6402   gsi_prev_nondebug (&gsi);
6403   stmt = gsi_stmt (gsi);
6404   if (stmt != iv_update)
6405     return;
6406
6407   gsi_prev_nondebug (&gsi);
6408   if (gsi_end_p (gsi))
6409     return;
6410
6411   stmt = gsi_stmt (gsi);
6412   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6413     return;
6414
6415   if (stmt != use->stmt)
6416     return;
6417
6418   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6419     return;
6420
6421   if (dump_file && (dump_flags & TDF_DETAILS))
6422     {
6423       fprintf (dump_file, "Reordering \n");
6424       print_gimple_stmt (dump_file, iv_update, 0, 0);
6425       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6426       fprintf (dump_file, "\n");
6427     }
6428
6429   gsi = gsi_for_stmt (use->stmt);
6430   gsi_iv = gsi_for_stmt (iv_update);
6431   gsi_move_before (&gsi_iv, &gsi);
6432
6433   cand->pos = IP_BEFORE_USE;
6434   cand->incremented_at = use->stmt;
6435 }
6436
6437 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6438
6439 static void
6440 rewrite_use_address (struct ivopts_data *data,
6441                      struct iv_use *use, struct iv_cand *cand)
6442 {
6443   aff_tree aff;
6444   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6445   tree base_hint = NULL_TREE;
6446   tree ref, iv;
6447   bool ok;
6448
6449   adjust_iv_update_pos (cand, use);
6450   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6451   gcc_assert (ok);
6452   unshare_aff_combination (&aff);
6453
6454   /* To avoid undefined overflow problems, all IV candidates use unsigned
6455      integer types.  The drawback is that this makes it impossible for
6456      create_mem_ref to distinguish an IV that is based on a memory object
6457      from one that represents simply an offset.
6458
6459      To work around this problem, we pass a hint to create_mem_ref that
6460      indicates which variable (if any) in aff is an IV based on a memory
6461      object.  Note that we only consider the candidate.  If this is not
6462      based on an object, the base of the reference is in some subexpression
6463      of the use -- but these will use pointer types, so they are recognized
6464      by the create_mem_ref heuristics anyway.  */
6465   if (cand->iv->base_object)
6466     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6467
6468   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6469   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6470                         reference_alias_ptr_type (*use->op_p),
6471                         iv, base_hint, data->speed);
6472   copy_ref_info (ref, *use->op_p);
6473   *use->op_p = ref;
6474 }
6475
6476 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6477    candidate CAND.  */
6478
6479 static void
6480 rewrite_use_compare (struct ivopts_data *data,
6481                      struct iv_use *use, struct iv_cand *cand)
6482 {
6483   tree comp, *var_p, op, bound;
6484   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6485   enum tree_code compare;
6486   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6487   bool ok;
6488
6489   bound = cp->value;
6490   if (bound)
6491     {
6492       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6493       tree var_type = TREE_TYPE (var);
6494       gimple_seq stmts;
6495
6496       if (dump_file && (dump_flags & TDF_DETAILS))
6497         {
6498           fprintf (dump_file, "Replacing exit test: ");
6499           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6500         }
6501       compare = cp->comp;
6502       bound = unshare_expr (fold_convert (var_type, bound));
6503       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6504       if (stmts)
6505         gsi_insert_seq_on_edge_immediate (
6506                 loop_preheader_edge (data->current_loop),
6507                 stmts);
6508
6509       gimple_cond_set_lhs (use->stmt, var);
6510       gimple_cond_set_code (use->stmt, compare);
6511       gimple_cond_set_rhs (use->stmt, op);
6512       return;
6513     }
6514
6515   /* The induction variable elimination failed; just express the original
6516      giv.  */
6517   comp = get_computation (data->current_loop, use, cand);
6518   gcc_assert (comp != NULL_TREE);
6519
6520   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6521   gcc_assert (ok);
6522
6523   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6524                                      true, GSI_SAME_STMT);
6525 }
6526
6527 /* Rewrites USE using candidate CAND.  */
6528
6529 static void
6530 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6531 {
6532   switch (use->type)
6533     {
6534       case USE_NONLINEAR_EXPR:
6535         rewrite_use_nonlinear_expr (data, use, cand);
6536         break;
6537
6538       case USE_ADDRESS:
6539         rewrite_use_address (data, use, cand);
6540         break;
6541
6542       case USE_COMPARE:
6543         rewrite_use_compare (data, use, cand);
6544         break;
6545
6546       default:
6547         gcc_unreachable ();
6548     }
6549
6550   update_stmt (use->stmt);
6551 }
6552
6553 /* Rewrite the uses using the selected induction variables.  */
6554
6555 static void
6556 rewrite_uses (struct ivopts_data *data)
6557 {
6558   unsigned i;
6559   struct iv_cand *cand;
6560   struct iv_use *use;
6561
6562   for (i = 0; i < n_iv_uses (data); i++)
6563     {
6564       use = iv_use (data, i);
6565       cand = use->selected;
6566       gcc_assert (cand);
6567
6568       rewrite_use (data, use, cand);
6569     }
6570 }
6571
6572 /* Removes the ivs that are not used after rewriting.  */
6573
6574 static void
6575 remove_unused_ivs (struct ivopts_data *data)
6576 {
6577   unsigned j;
6578   bitmap_iterator bi;
6579   bitmap toremove = BITMAP_ALLOC (NULL);
6580
6581   /* Figure out an order in which to release SSA DEFs so that we don't
6582      release something that we'd have to propagate into a debug stmt
6583      afterwards.  */
6584   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6585     {
6586       struct version_info *info;
6587
6588       info = ver_info (data, j);
6589       if (info->iv
6590           && !integer_zerop (info->iv->step)
6591           && !info->inv_id
6592           && !info->iv->have_use_for
6593           && !info->preserve_biv)
6594         {
6595           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6596
6597           tree def = info->iv->ssa_name;
6598
6599           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6600             {
6601               imm_use_iterator imm_iter;
6602               use_operand_p use_p;
6603               gimple stmt;
6604               int count = 0;
6605
6606               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6607                 {
6608                   if (!gimple_debug_bind_p (stmt))
6609                     continue;
6610
6611                   /* We just want to determine whether to do nothing
6612                      (count == 0), to substitute the computed
6613                      expression into a single use of the SSA DEF by
6614                      itself (count == 1), or to use a debug temp
6615                      because the SSA DEF is used multiple times or as
6616                      part of a larger expression (count > 1). */
6617                   count++;
6618                   if (gimple_debug_bind_get_value (stmt) != def)
6619                     count++;
6620
6621                   if (count > 1)
6622                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6623                 }
6624
6625               if (!count)
6626                 continue;
6627
6628               struct iv_use dummy_use;
6629               struct iv_cand *best_cand = NULL, *cand;
6630               unsigned i, best_pref = 0, cand_pref;
6631
6632               memset (&dummy_use, 0, sizeof (dummy_use));
6633               dummy_use.iv = info->iv;
6634               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6635                 {
6636                   cand = iv_use (data, i)->selected;
6637                   if (cand == best_cand)
6638                     continue;
6639                   cand_pref = operand_equal_p (cand->iv->step,
6640                                                info->iv->step, 0)
6641                     ? 4 : 0;
6642                   cand_pref
6643                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6644                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6645                     ? 2 : 0;
6646                   cand_pref
6647                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6648                     ? 1 : 0;
6649                   if (best_cand == NULL || best_pref < cand_pref)
6650                     {
6651                       best_cand = cand;
6652                       best_pref = cand_pref;
6653                     }
6654                 }
6655
6656               if (!best_cand)
6657                 continue;
6658
6659               tree comp = get_computation_at (data->current_loop,
6660                                               &dummy_use, best_cand,
6661                                               SSA_NAME_DEF_STMT (def));
6662               if (!comp)
6663                 continue;
6664
6665               if (count > 1)
6666                 {
6667                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6668                   DECL_ARTIFICIAL (vexpr) = 1;
6669                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6670                   if (SSA_NAME_VAR (def))
6671                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6672                   else
6673                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6674                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6675                   gimple_stmt_iterator gsi;
6676
6677                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6678                     gsi = gsi_after_labels (gimple_bb
6679                                             (SSA_NAME_DEF_STMT (def)));
6680                   else
6681                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6682
6683                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6684                   comp = vexpr;
6685                 }
6686
6687               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6688                 {
6689                   if (!gimple_debug_bind_p (stmt))
6690                     continue;
6691
6692                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6693                     SET_USE (use_p, comp);
6694
6695                   update_stmt (stmt);
6696                 }
6697             }
6698         }
6699     }
6700
6701   release_defs_bitset (toremove);
6702
6703   BITMAP_FREE (toremove);
6704 }
6705
6706 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6707    for pointer_map_traverse.  */
6708
6709 static bool
6710 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6711                       void *data ATTRIBUTE_UNUSED)
6712 {
6713   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6714
6715   free (niter);
6716   return true;
6717 }
6718
6719 /* Frees data allocated by the optimization of a single loop.  */
6720
6721 static void
6722 free_loop_data (struct ivopts_data *data)
6723 {
6724   unsigned i, j;
6725   bitmap_iterator bi;
6726   tree obj;
6727
6728   if (data->niters)
6729     {
6730       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6731       pointer_map_destroy (data->niters);
6732       data->niters = NULL;
6733     }
6734
6735   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6736     {
6737       struct version_info *info;
6738
6739       info = ver_info (data, i);
6740       free (info->iv);
6741       info->iv = NULL;
6742       info->has_nonlin_use = false;
6743       info->preserve_biv = false;
6744       info->inv_id = 0;
6745     }
6746   bitmap_clear (data->relevant);
6747   bitmap_clear (data->important_candidates);
6748
6749   for (i = 0; i < n_iv_uses (data); i++)
6750     {
6751       struct iv_use *use = iv_use (data, i);
6752
6753       free (use->iv);
6754       BITMAP_FREE (use->related_cands);
6755       for (j = 0; j < use->n_map_members; j++)
6756         if (use->cost_map[j].depends_on)
6757           BITMAP_FREE (use->cost_map[j].depends_on);
6758       free (use->cost_map);
6759       free (use);
6760     }
6761   data->iv_uses.truncate (0);
6762
6763   for (i = 0; i < n_iv_cands (data); i++)
6764     {
6765       struct iv_cand *cand = iv_cand (data, i);
6766
6767       free (cand->iv);
6768       if (cand->depends_on)
6769         BITMAP_FREE (cand->depends_on);
6770       free (cand);
6771     }
6772   data->iv_candidates.truncate (0);
6773
6774   if (data->version_info_size < num_ssa_names)
6775     {
6776       data->version_info_size = 2 * num_ssa_names;
6777       free (data->version_info);
6778       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6779     }
6780
6781   data->max_inv_id = 0;
6782
6783   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6784     SET_DECL_RTL (obj, NULL_RTX);
6785
6786   decl_rtl_to_reset.truncate (0);
6787
6788   data->inv_expr_tab.empty ();
6789   data->inv_expr_id = 0;
6790 }
6791
6792 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6793    loop tree.  */
6794
6795 static void
6796 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6797 {
6798   free_loop_data (data);
6799   free (data->version_info);
6800   BITMAP_FREE (data->relevant);
6801   BITMAP_FREE (data->important_candidates);
6802
6803   decl_rtl_to_reset.release ();
6804   data->iv_uses.release ();
6805   data->iv_candidates.release ();
6806   data->inv_expr_tab.dispose ();
6807 }
6808
6809 /* Returns true if the loop body BODY includes any function calls.  */
6810
6811 static bool
6812 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6813 {
6814   gimple_stmt_iterator gsi;
6815   unsigned i;
6816
6817   for (i = 0; i < num_nodes; i++)
6818     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6819       {
6820         gimple stmt = gsi_stmt (gsi);
6821         if (is_gimple_call (stmt)
6822             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6823           return true;
6824       }
6825   return false;
6826 }
6827
6828 /* Optimizes the LOOP.  Returns true if anything changed.  */
6829
6830 static bool
6831 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6832 {
6833   bool changed = false;
6834   struct iv_ca *iv_ca;
6835   edge exit = single_dom_exit (loop);
6836   basic_block *body;
6837
6838   gcc_assert (!data->niters);
6839   data->current_loop = loop;
6840   data->speed = optimize_loop_for_speed_p (loop);
6841
6842   if (dump_file && (dump_flags & TDF_DETAILS))
6843     {
6844       fprintf (dump_file, "Processing loop %d\n", loop->num);
6845
6846       if (exit)
6847         {
6848           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6849                    exit->src->index, exit->dest->index);
6850           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6851           fprintf (dump_file, "\n");
6852         }
6853
6854       fprintf (dump_file, "\n");
6855     }
6856
6857   body = get_loop_body (loop);
6858   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6859   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6860   free (body);
6861
6862   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6863
6864   /* For each ssa name determines whether it behaves as an induction variable
6865      in some loop.  */
6866   if (!find_induction_variables (data))
6867     goto finish;
6868
6869   /* Finds interesting uses (item 1).  */
6870   find_interesting_uses (data);
6871   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6872     goto finish;
6873
6874   /* Finds candidates for the induction variables (item 2).  */
6875   find_iv_candidates (data);
6876
6877   /* Calculates the costs (item 3, part 1).  */
6878   determine_iv_costs (data);
6879   determine_use_iv_costs (data);
6880   determine_set_costs (data);
6881
6882   /* Find the optimal set of induction variables (item 3, part 2).  */
6883   iv_ca = find_optimal_iv_set (data);
6884   if (!iv_ca)
6885     goto finish;
6886   changed = true;
6887
6888   /* Create the new induction variables (item 4, part 1).  */
6889   create_new_ivs (data, iv_ca);
6890   iv_ca_free (&iv_ca);
6891
6892   /* Rewrite the uses (item 4, part 2).  */
6893   rewrite_uses (data);
6894
6895   /* Remove the ivs that are unused after rewriting.  */
6896   remove_unused_ivs (data);
6897
6898   /* We have changed the structure of induction variables; it might happen
6899      that definitions in the scev database refer to some of them that were
6900      eliminated.  */
6901   scev_reset ();
6902
6903 finish:
6904   free_loop_data (data);
6905
6906   return changed;
6907 }
6908
6909 /* Main entry point.  Optimizes induction variables in loops.  */
6910
6911 void
6912 tree_ssa_iv_optimize (void)
6913 {
6914   struct loop *loop;
6915   struct ivopts_data data;
6916
6917   tree_ssa_iv_optimize_init (&data);
6918
6919   /* Optimize the loops starting with the innermost ones.  */
6920   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6921     {
6922       if (dump_file && (dump_flags & TDF_DETAILS))
6923         flow_loop_dump (loop, dump_file, NULL, 1);
6924
6925       tree_ssa_iv_optimize_loop (&data, loop);
6926     }
6927
6928   tree_ssa_iv_optimize_finalize (&data);
6929 }