gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "cfghooks.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "rtl.h"
  72 #include "ssa.h"
  73 #include "alias.h"
  74 #include "fold-const.h"
  75 #include "stor-layout.h"
  76 #include "tm_p.h"
  77 #include "gimple-pretty-print.h"
  78 #include "internal-fn.h"
  79 #include "tree-eh.h"
  80 #include "gimplify.h"
  81 #include "gimple-iterator.h"
  82 #include "gimplify-me.h"
  83 #include "cgraph.h"
  84 #include "tree-cfg.h"
  85 #include "tree-ssa-loop-ivopts.h"
  86 #include "tree-ssa-loop-manip.h"
  87 #include "tree-ssa-loop-niter.h"
  88 #include "tree-ssa-loop.h"
  89 #include "flags.h"
  90 #include "insn-config.h"
  91 #include "expmed.h"
  92 #include "dojump.h"
  93 #include "explow.h"
  94 #include "calls.h"
  95 #include "emit-rtl.h"
  96 #include "varasm.h"
  97 #include "stmt.h"
  98 #include "expr.h"
  99 #include "tree-dfa.h"
 100 #include "tree-ssa.h"
 101 #include "cfgloop.h"
 102 #include "tree-pass.h"
 103 #include "tree-chrec.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "langhooks.h"
 107 #include "tree-affine.h"
 108 #include "target.h"
 109 #include "tree-inline.h"
 110 #include "tree-ssa-propagate.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113 #include "tree-vectorizer.h"
 114
 115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 116    cost of different addressing modes.  This should be moved to a TBD
 117    interface between the GIMPLE and RTL worlds.  */
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   unsigned use_id;      /* The identifier in the use if it is the case.  */
 147   bool biv_p;           /* Is it a biv?  */
 148   bool have_use_for;    /* Do we already have a use for it?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   int cost;             /* The runtime cost.  */
 177   unsigned complexity;  /* The estimate of the complexity of the code for
 178                            the computation (in no concrete units --
 179                            complexity field should be larger for more
 180                            complex expressions and addressing modes).  */
 181 };
 182
 183 static const comp_cost no_cost = {0, 0};
 184 static const comp_cost infinite_cost = {INFTY, INFTY};
 185
 186 /* The candidate - cost pair.  */
 187 struct cost_pair
 188 {
 189   struct iv_cand *cand; /* The candidate.  */
 190   comp_cost cost;       /* The cost.  */
 191   bitmap depends_on;    /* The list of invariants that have to be
 192                            preserved.  */
 193   tree value;           /* For final value elimination, the expression for
 194                            the final value of the iv.  For iv elimination,
 195                            the new bound to compare with.  */
 196   enum tree_code comp;  /* For iv elimination, the comparison.  */
 197   int inv_expr_id;      /* Loop invariant expression id.  */
 198 };
 199
 200 /* Use.  */
 201 struct iv_use
 202 {
 203   unsigned id;          /* The id of the use.  */
 204   unsigned sub_id;      /* The id of the sub use.  */
 205   enum use_type type;   /* Type of the use.  */
 206   struct iv *iv;        /* The induction variable it is based on.  */
 207   gimple *stmt;         /* Statement in that it occurs.  */
 208   tree *op_p;           /* The place where it occurs.  */
 209   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 210                            important ones.  */
 211
 212   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 213   struct cost_pair *cost_map;
 214                         /* The costs wrto the iv candidates.  */
 215
 216   struct iv_cand *selected;
 217                         /* The selected candidate.  */
 218
 219   struct iv_use *next;  /* The next sub use.  */
 220   tree addr_base;       /* Base address with const offset stripped.  */
 221   unsigned HOST_WIDE_INT addr_offset;
 222                         /* Const offset stripped from base address.  */
 223 };
 224
 225 /* The position where the iv is computed.  */
 226 enum iv_position
 227 {
 228   IP_NORMAL,            /* At the end, just before the exit condition.  */
 229   IP_END,               /* At the end of the latch block.  */
 230   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 231   IP_AFTER_USE,         /* Immediately after a specific use.  */
 232   IP_ORIGINAL           /* The original biv.  */
 233 };
 234
 235 /* The induction variable candidate.  */
 236 struct iv_cand
 237 {
 238   unsigned id;          /* The number of the candidate.  */
 239   bool important;       /* Whether this is an "important" candidate, i.e. such
 240                            that it should be considered by all uses.  */
 241   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 242   gimple *incremented_at;/* For original biv, the statement where it is
 243                            incremented.  */
 244   tree var_before;      /* The variable used for it before increment.  */
 245   tree var_after;       /* The variable used for it after increment.  */
 246   struct iv *iv;        /* The value of the candidate.  NULL for
 247                            "pseudocandidate" used to indicate the possibility
 248                            to replace the final value of an iv by direct
 249                            computation of the value.  */
 250   unsigned cost;        /* Cost of the candidate.  */
 251   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 252   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 253                               where it is incremented.  */
 254   bitmap depends_on;    /* The list of invariants that are used in step of the
 255                            biv.  */
 256   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 257                            smaller type.  */
 258 };
 259
 260 /* Loop invariant expression hashtable entry.  */
 261 struct iv_inv_expr_ent
 262 {
 263   tree expr;
 264   int id;
 265   hashval_t hash;
 266 };
 267
 268 /* The data used by the induction variable optimizations.  */
 269
 270 /* Hashtable helpers.  */
 271
 272 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 273 {
 274   static inline hashval_t hash (const iv_inv_expr_ent *);
 275   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 276 };
 277
 278 /* Hash function for loop invariant expressions.  */
 279
 280 inline hashval_t
 281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 282 {
 283   return expr->hash;
 284 }
 285
 286 /* Hash table equality function for expressions.  */
 287
 288 inline bool
 289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 290                            const iv_inv_expr_ent *expr2)
 291 {
 292   return expr1->hash == expr2->hash
 293          && operand_equal_p (expr1->expr, expr2->expr, 0);
 294 }
 295
 296 struct ivopts_data
 297 {
 298   /* The currently optimized loop.  */
 299   struct loop *current_loop;
 300   source_location loop_loc;
 301
 302   /* Numbers of iterations for all exits of the current loop.  */
 303   hash_map<edge, tree_niter_desc *> *niters;
 304
 305   /* Number of registers used in it.  */
 306   unsigned regs_used;
 307
 308   /* The size of version_info array allocated.  */
 309   unsigned version_info_size;
 310
 311   /* The array of information for the ssa names.  */
 312   struct version_info *version_info;
 313
 314   /* The hashtable of loop invariant expressions created
 315      by ivopt.  */
 316   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 317
 318   /* Loop invariant expression id.  */
 319   int inv_expr_id;
 320
 321   /* The bitmap of indices in version_info whose value was changed.  */
 322   bitmap relevant;
 323
 324   /* The uses of induction variables.  */
 325   vec<iv_use *> iv_uses;
 326
 327   /* The candidates.  */
 328   vec<iv_cand *> iv_candidates;
 329
 330   /* A bitmap of important candidates.  */
 331   bitmap important_candidates;
 332
 333   /* Cache used by tree_to_aff_combination_expand.  */
 334   hash_map<tree, name_expansion *> *name_expansion_cache;
 335
 336   /* The maximum invariant id.  */
 337   unsigned max_inv_id;
 338
 339   /* Number of no_overflow BIVs which are not used in memory address.  */
 340   unsigned bivs_not_used_in_addr;
 341
 342   /* Obstack for iv structure.  */
 343   struct obstack iv_obstack;
 344
 345   /* Whether to consider just related and important candidates when replacing a
 346      use.  */
 347   bool consider_all_candidates;
 348
 349   /* Are we optimizing for speed?  */
 350   bool speed;
 351
 352   /* Whether the loop body includes any function calls.  */
 353   bool body_includes_call;
 354
 355   /* Whether the loop body can only be exited via single exit.  */
 356   bool loop_single_exit_p;
 357 };
 358
 359 /* An assignment of iv candidates to uses.  */
 360
 361 struct iv_ca
 362 {
 363   /* The number of uses covered by the assignment.  */
 364   unsigned upto;
 365
 366   /* Number of uses that cannot be expressed by the candidates in the set.  */
 367   unsigned bad_uses;
 368
 369   /* Candidate assigned to a use, together with the related costs.  */
 370   struct cost_pair **cand_for_use;
 371
 372   /* Number of times each candidate is used.  */
 373   unsigned *n_cand_uses;
 374
 375   /* The candidates used.  */
 376   bitmap cands;
 377
 378   /* The number of candidates in the set.  */
 379   unsigned n_cands;
 380
 381   /* Total number of registers needed.  */
 382   unsigned n_regs;
 383
 384   /* Total cost of expressing uses.  */
 385   comp_cost cand_use_cost;
 386
 387   /* Total cost of candidates.  */
 388   unsigned cand_cost;
 389
 390   /* Number of times each invariant is used.  */
 391   unsigned *n_invariant_uses;
 392
 393   /* The array holding the number of uses of each loop
 394      invariant expressions created by ivopt.  */
 395   unsigned *used_inv_expr;
 396
 397   /* The number of created loop invariants.  */
 398   unsigned num_used_inv_expr;
 399
 400   /* Total cost of the assignment.  */
 401   comp_cost cost;
 402 };
 403
 404 /* Difference of two iv candidate assignments.  */
 405
 406 struct iv_ca_delta
 407 {
 408   /* Changed use.  */
 409   struct iv_use *use;
 410
 411   /* An old assignment (for rollback purposes).  */
 412   struct cost_pair *old_cp;
 413
 414   /* A new assignment.  */
 415   struct cost_pair *new_cp;
 416
 417   /* Next change in the list.  */
 418   struct iv_ca_delta *next_change;
 419 };
 420
 421 /* Bound on number of candidates below that all candidates are considered.  */
 422
 423 #define CONSIDER_ALL_CANDIDATES_BOUND \
 424   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 425
 426 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 427    optimizing such a loop would help, and it would take ages).  */
 428
 429 #define MAX_CONSIDERED_USES \
 430   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 431
 432 /* If there are at most this number of ivs in the set, try removing unnecessary
 433    ivs from the set always.  */
 434
 435 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 436   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 437
 438 /* The list of trees for that the decl_rtl field must be reset is stored
 439    here.  */
 440
 441 static vec<tree> decl_rtl_to_reset;
 442
 443 static comp_cost force_expr_to_var_cost (tree, bool);
 444
 445 /* Number of uses recorded in DATA.  */
 446
 447 static inline unsigned
 448 n_iv_uses (struct ivopts_data *data)
 449 {
 450   return data->iv_uses.length ();
 451 }
 452
 453 /* Ith use recorded in DATA.  */
 454
 455 static inline struct iv_use *
 456 iv_use (struct ivopts_data *data, unsigned i)
 457 {
 458   return data->iv_uses[i];
 459 }
 460
 461 /* Number of candidates recorded in DATA.  */
 462
 463 static inline unsigned
 464 n_iv_cands (struct ivopts_data *data)
 465 {
 466   return data->iv_candidates.length ();
 467 }
 468
 469 /* Ith candidate recorded in DATA.  */
 470
 471 static inline struct iv_cand *
 472 iv_cand (struct ivopts_data *data, unsigned i)
 473 {
 474   return data->iv_candidates[i];
 475 }
 476
 477 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 478
 479 edge
 480 single_dom_exit (struct loop *loop)
 481 {
 482   edge exit = single_exit (loop);
 483
 484   if (!exit)
 485     return NULL;
 486
 487   if (!just_once_each_iteration_p (loop, exit->src))
 488     return NULL;
 489
 490   return exit;
 491 }
 492
 493 /* Dumps information about the induction variable IV to FILE.  */
 494
 495 void
 496 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 497 {
 498   if (iv->ssa_name && dump_name)
 499     {
 500       fprintf (file, "ssa name ");
 501       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 502       fprintf (file, "\n");
 503     }
 504
 505   fprintf (file, "  type ");
 506   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 507   fprintf (file, "\n");
 508
 509   if (iv->step)
 510     {
 511       fprintf (file, "  base ");
 512       print_generic_expr (file, iv->base, TDF_SLIM);
 513       fprintf (file, "\n");
 514
 515       fprintf (file, "  step ");
 516       print_generic_expr (file, iv->step, TDF_SLIM);
 517       fprintf (file, "\n");
 518     }
 519   else
 520     {
 521       fprintf (file, "  invariant ");
 522       print_generic_expr (file, iv->base, TDF_SLIM);
 523       fprintf (file, "\n");
 524     }
 525
 526   if (iv->base_object)
 527     {
 528       fprintf (file, "  base object ");
 529       print_generic_expr (file, iv->base_object, TDF_SLIM);
 530       fprintf (file, "\n");
 531     }
 532
 533   if (iv->biv_p)
 534     fprintf (file, "  is a biv\n");
 535
 536   if (iv->no_overflow)
 537     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
 538 }
 539
 540 /* Dumps information about the USE to FILE.  */
 541
 542 void
 543 dump_use (FILE *file, struct iv_use *use)
 544 {
 545   fprintf (file, "use %d", use->id);
 546   if (use->sub_id)
 547     fprintf (file, ".%d", use->sub_id);
 548
 549   fprintf (file, "\n");
 550
 551   switch (use->type)
 552     {
 553     case USE_NONLINEAR_EXPR:
 554       fprintf (file, "  generic\n");
 555       break;
 556
 557     case USE_ADDRESS:
 558       fprintf (file, "  address\n");
 559       break;
 560
 561     case USE_COMPARE:
 562       fprintf (file, "  compare\n");
 563       break;
 564
 565     default:
 566       gcc_unreachable ();
 567     }
 568
 569   fprintf (file, "  in statement ");
 570   print_gimple_stmt (file, use->stmt, 0, 0);
 571   fprintf (file, "\n");
 572
 573   fprintf (file, "  at position ");
 574   if (use->op_p)
 575     print_generic_expr (file, *use->op_p, TDF_SLIM);
 576   fprintf (file, "\n");
 577
 578   dump_iv (file, use->iv, false);
 579
 580   if (use->related_cands)
 581     {
 582       fprintf (file, "  related candidates ");
 583       dump_bitmap (file, use->related_cands);
 584     }
 585 }
 586
 587 /* Dumps information about the uses to FILE.  */
 588
 589 void
 590 dump_uses (FILE *file, struct ivopts_data *data)
 591 {
 592   unsigned i;
 593   struct iv_use *use;
 594
 595   for (i = 0; i < n_iv_uses (data); i++)
 596     {
 597       use = iv_use (data, i);
 598       do
 599         {
 600           dump_use (file, use);
 601           use = use->next;
 602         }
 603       while (use);
 604       fprintf (file, "\n");
 605     }
 606 }
 607
 608 /* Dumps information about induction variable candidate CAND to FILE.  */
 609
 610 void
 611 dump_cand (FILE *file, struct iv_cand *cand)
 612 {
 613   struct iv *iv = cand->iv;
 614
 615   fprintf (file, "candidate %d%s\n",
 616            cand->id, cand->important ? " (important)" : "");
 617
 618   if (cand->depends_on)
 619     {
 620       fprintf (file, "  depends on ");
 621       dump_bitmap (file, cand->depends_on);
 622     }
 623
 624   if (!iv)
 625     {
 626       fprintf (file, "  final value replacement\n");
 627       return;
 628     }
 629
 630   if (cand->var_before)
 631     {
 632       fprintf (file, "  var_before ");
 633       print_generic_expr (file, cand->var_before, TDF_SLIM);
 634       fprintf (file, "\n");
 635     }
 636   if (cand->var_after)
 637     {
 638       fprintf (file, "  var_after ");
 639       print_generic_expr (file, cand->var_after, TDF_SLIM);
 640       fprintf (file, "\n");
 641     }
 642
 643   switch (cand->pos)
 644     {
 645     case IP_NORMAL:
 646       fprintf (file, "  incremented before exit test\n");
 647       break;
 648
 649     case IP_BEFORE_USE:
 650       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 651       break;
 652
 653     case IP_AFTER_USE:
 654       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 655       break;
 656
 657     case IP_END:
 658       fprintf (file, "  incremented at end\n");
 659       break;
 660
 661     case IP_ORIGINAL:
 662       fprintf (file, "  original biv\n");
 663       break;
 664     }
 665
 666   dump_iv (file, iv, false);
 667 }
 668
 669 /* Returns the info for ssa version VER.  */
 670
 671 static inline struct version_info *
 672 ver_info (struct ivopts_data *data, unsigned ver)
 673 {
 674   return data->version_info + ver;
 675 }
 676
 677 /* Returns the info for ssa name NAME.  */
 678
 679 static inline struct version_info *
 680 name_info (struct ivopts_data *data, tree name)
 681 {
 682   return ver_info (data, SSA_NAME_VERSION (name));
 683 }
 684
 685 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 686    emitted in LOOP.  */
 687
 688 static bool
 689 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 690 {
 691   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 692
 693   gcc_assert (bb);
 694
 695   if (sbb == loop->latch)
 696     return true;
 697
 698   if (sbb != bb)
 699     return false;
 700
 701   return stmt == last_stmt (bb);
 702 }
 703
 704 /* Returns true if STMT if after the place where the original induction
 705    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 706    if the positions are identical.  */
 707
 708 static bool
 709 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 710 {
 711   basic_block cand_bb = gimple_bb (cand->incremented_at);
 712   basic_block stmt_bb = gimple_bb (stmt);
 713
 714   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 715     return false;
 716
 717   if (stmt_bb != cand_bb)
 718     return true;
 719
 720   if (true_if_equal
 721       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 722     return true;
 723   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 724 }
 725
 726 /* Returns true if STMT if after the place where the induction variable
 727    CAND is incremented in LOOP.  */
 728
 729 static bool
 730 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 731 {
 732   switch (cand->pos)
 733     {
 734     case IP_END:
 735       return false;
 736
 737     case IP_NORMAL:
 738       return stmt_after_ip_normal_pos (loop, stmt);
 739
 740     case IP_ORIGINAL:
 741     case IP_AFTER_USE:
 742       return stmt_after_inc_pos (cand, stmt, false);
 743
 744     case IP_BEFORE_USE:
 745       return stmt_after_inc_pos (cand, stmt, true);
 746
 747     default:
 748       gcc_unreachable ();
 749     }
 750 }
 751
 752 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 753
 754 static bool
 755 abnormal_ssa_name_p (tree exp)
 756 {
 757   if (!exp)
 758     return false;
 759
 760   if (TREE_CODE (exp) != SSA_NAME)
 761     return false;
 762
 763   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 764 }
 765
 766 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 767    abnormal phi node.  Callback for for_each_index.  */
 768
 769 static bool
 770 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 771                                   void *data ATTRIBUTE_UNUSED)
 772 {
 773   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 774     {
 775       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 776         return false;
 777       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 778         return false;
 779     }
 780
 781   return !abnormal_ssa_name_p (*index);
 782 }
 783
 784 /* Returns true if EXPR contains a ssa name that occurs in an
 785    abnormal phi node.  */
 786
 787 bool
 788 contains_abnormal_ssa_name_p (tree expr)
 789 {
 790   enum tree_code code;
 791   enum tree_code_class codeclass;
 792
 793   if (!expr)
 794     return false;
 795
 796   code = TREE_CODE (expr);
 797   codeclass = TREE_CODE_CLASS (code);
 798
 799   if (code == SSA_NAME)
 800     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 801
 802   if (code == INTEGER_CST
 803       || is_gimple_min_invariant (expr))
 804     return false;
 805
 806   if (code == ADDR_EXPR)
 807     return !for_each_index (&TREE_OPERAND (expr, 0),
 808                             idx_contains_abnormal_ssa_name_p,
 809                             NULL);
 810
 811   if (code == COND_EXPR)
 812     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 813       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 814       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 815
 816   switch (codeclass)
 817     {
 818     case tcc_binary:
 819     case tcc_comparison:
 820       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 821         return true;
 822
 823       /* Fallthru.  */
 824     case tcc_unary:
 825       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 826         return true;
 827
 828       break;
 829
 830     default:
 831       gcc_unreachable ();
 832     }
 833
 834   return false;
 835 }
 836
 837 /*  Returns the structure describing number of iterations determined from
 838     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 839
 840 static struct tree_niter_desc *
 841 niter_for_exit (struct ivopts_data *data, edge exit)
 842 {
 843   struct tree_niter_desc *desc;
 844   tree_niter_desc **slot;
 845
 846   if (!data->niters)
 847     {
 848       data->niters = new hash_map<edge, tree_niter_desc *>;
 849       slot = NULL;
 850     }
 851   else
 852     slot = data->niters->get (exit);
 853
 854   if (!slot)
 855     {
 856       /* Try to determine number of iterations.  We cannot safely work with ssa
 857          names that appear in phi nodes on abnormal edges, so that we do not
 858          create overlapping life ranges for them (PR 27283).  */
 859       desc = XNEW (struct tree_niter_desc);
 860       if (!number_of_iterations_exit (data->current_loop,
 861                                       exit, desc, true)
 862           || contains_abnormal_ssa_name_p (desc->niter))
 863         {
 864           XDELETE (desc);
 865           desc = NULL;
 866         }
 867       data->niters->put (exit, desc);
 868     }
 869   else
 870     desc = *slot;
 871
 872   return desc;
 873 }
 874
 875 /* Returns the structure describing number of iterations determined from
 876    single dominating exit of DATA->current_loop, or NULL if something
 877    goes wrong.  */
 878
 879 static struct tree_niter_desc *
 880 niter_for_single_dom_exit (struct ivopts_data *data)
 881 {
 882   edge exit = single_dom_exit (data->current_loop);
 883
 884   if (!exit)
 885     return NULL;
 886
 887   return niter_for_exit (data, exit);
 888 }
 889
 890 /* Initializes data structures used by the iv optimization pass, stored
 891    in DATA.  */
 892
 893 static void
 894 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 895 {
 896   data->version_info_size = 2 * num_ssa_names;
 897   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 898   data->relevant = BITMAP_ALLOC (NULL);
 899   data->important_candidates = BITMAP_ALLOC (NULL);
 900   data->max_inv_id = 0;
 901   data->niters = NULL;
 902   data->iv_uses.create (20);
 903   data->iv_candidates.create (20);
 904   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 905   data->inv_expr_id = 0;
 906   data->name_expansion_cache = NULL;
 907   decl_rtl_to_reset.create (20);
 908   gcc_obstack_init (&data->iv_obstack);
 909 }
 910
 911 /* Returns a memory object to that EXPR points.  In case we are able to
 912    determine that it does not point to any such object, NULL is returned.  */
 913
 914 static tree
 915 determine_base_object (tree expr)
 916 {
 917   enum tree_code code = TREE_CODE (expr);
 918   tree base, obj;
 919
 920   /* If this is a pointer casted to any type, we need to determine
 921      the base object for the pointer; so handle conversions before
 922      throwing away non-pointer expressions.  */
 923   if (CONVERT_EXPR_P (expr))
 924     return determine_base_object (TREE_OPERAND (expr, 0));
 925
 926   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 927     return NULL_TREE;
 928
 929   switch (code)
 930     {
 931     case INTEGER_CST:
 932       return NULL_TREE;
 933
 934     case ADDR_EXPR:
 935       obj = TREE_OPERAND (expr, 0);
 936       base = get_base_address (obj);
 937
 938       if (!base)
 939         return expr;
 940
 941       if (TREE_CODE (base) == MEM_REF)
 942         return determine_base_object (TREE_OPERAND (base, 0));
 943
 944       return fold_convert (ptr_type_node,
 945                            build_fold_addr_expr (base));
 946
 947     case POINTER_PLUS_EXPR:
 948       return determine_base_object (TREE_OPERAND (expr, 0));
 949
 950     case PLUS_EXPR:
 951     case MINUS_EXPR:
 952       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 953       gcc_unreachable ();
 954
 955     default:
 956       return fold_convert (ptr_type_node, expr);
 957     }
 958 }
 959
 960 /* Return true if address expression with non-DECL_P operand appears
 961    in EXPR.  */
 962
 963 static bool
 964 contain_complex_addr_expr (tree expr)
 965 {
 966   bool res = false;
 967
 968   STRIP_NOPS (expr);
 969   switch (TREE_CODE (expr))
 970     {
 971     case POINTER_PLUS_EXPR:
 972     case PLUS_EXPR:
 973     case MINUS_EXPR:
 974       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 975       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 976       break;
 977
 978     case ADDR_EXPR:
 979       return (!DECL_P (TREE_OPERAND (expr, 0)));
 980
 981     default:
 982       return false;
 983     }
 984
 985   return res;
 986 }
 987
 988 /* Allocates an induction variable with given initial value BASE and step STEP
 989    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 990
 991 static struct iv *
 992 alloc_iv (struct ivopts_data *data, tree base, tree step,
 993           bool no_overflow = false)
 994 {
 995   tree expr = base;
 996   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
 997                                               sizeof (struct iv));
 998   gcc_assert (step != NULL_TREE);
 999
1000   /* Lower address expression in base except ones with DECL_P as operand.
1001      By doing this:
1002        1) More accurate cost can be computed for address expressions;
1003        2) Duplicate candidates won't be created for bases in different
1004           forms, like &a[0] and &a.  */
1005   STRIP_NOPS (expr);
1006   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007       || contain_complex_addr_expr (expr))
1008     {
1009       aff_tree comb;
1010       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1011       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1012     }
1013
1014   iv->base = base;
1015   iv->base_object = determine_base_object (base);
1016   iv->step = step;
1017   iv->biv_p = false;
1018   iv->have_use_for = false;
1019   iv->use_id = 0;
1020   iv->ssa_name = NULL_TREE;
1021   iv->no_overflow = no_overflow;
1022   iv->have_address_use = false;
1023
1024   return iv;
1025 }
1026
1027 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1028    doesn't overflow.  */
1029
1030 static void
1031 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1032         bool no_overflow)
1033 {
1034   struct version_info *info = name_info (data, iv);
1035
1036   gcc_assert (!info->iv);
1037
1038   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1039   info->iv = alloc_iv (data, base, step, no_overflow);
1040   info->iv->ssa_name = iv;
1041 }
1042
1043 /* Finds induction variable declaration for VAR.  */
1044
1045 static struct iv *
1046 get_iv (struct ivopts_data *data, tree var)
1047 {
1048   basic_block bb;
1049   tree type = TREE_TYPE (var);
1050
1051   if (!POINTER_TYPE_P (type)
1052       && !INTEGRAL_TYPE_P (type))
1053     return NULL;
1054
1055   if (!name_info (data, var)->iv)
1056     {
1057       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1058
1059       if (!bb
1060           || !flow_bb_inside_loop_p (data->current_loop, bb))
1061         set_iv (data, var, var, build_int_cst (type, 0), true);
1062     }
1063
1064   return name_info (data, var)->iv;
1065 }
1066
1067 /* Return the first non-invariant ssa var found in EXPR.  */
1068
1069 static tree
1070 extract_single_var_from_expr (tree expr)
1071 {
1072   int i, n;
1073   tree tmp;
1074   enum tree_code code;
1075
1076   if (!expr || is_gimple_min_invariant (expr))
1077     return NULL;
1078
1079   code = TREE_CODE (expr);
1080   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1081     {
1082       n = TREE_OPERAND_LENGTH (expr);
1083       for (i = 0; i < n; i++)
1084         {
1085           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1086
1087           if (tmp)
1088             return tmp;
1089         }
1090     }
1091   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1092 }
1093
1094 /* Finds basic ivs.  */
1095
1096 static bool
1097 find_bivs (struct ivopts_data *data)
1098 {
1099   gphi *phi;
1100   affine_iv iv;
1101   tree step, type, base, stop;
1102   bool found = false;
1103   struct loop *loop = data->current_loop;
1104   gphi_iterator psi;
1105
1106   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1107     {
1108       phi = psi.phi ();
1109
1110       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1111         continue;
1112
1113       if (virtual_operand_p (PHI_RESULT (phi)))
1114         continue;
1115
1116       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1117         continue;
1118
1119       if (integer_zerop (iv.step))
1120         continue;
1121
1122       step = iv.step;
1123       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1124       /* Stop expanding iv base at the first ssa var referred by iv step.
1125          Ideally we should stop at any ssa var, because that's expensive
1126          and unusual to happen, we just do it on the first one.
1127
1128          See PR64705 for the rationale.  */
1129       stop = extract_single_var_from_expr (step);
1130       base = expand_simple_operations (base, stop);
1131       if (contains_abnormal_ssa_name_p (base)
1132           || contains_abnormal_ssa_name_p (step))
1133         continue;
1134
1135       type = TREE_TYPE (PHI_RESULT (phi));
1136       base = fold_convert (type, base);
1137       if (step)
1138         {
1139           if (POINTER_TYPE_P (type))
1140             step = convert_to_ptrofftype (step);
1141           else
1142             step = fold_convert (type, step);
1143         }
1144
1145       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1146       found = true;
1147     }
1148
1149   return found;
1150 }
1151
1152 /* Marks basic ivs.  */
1153
1154 static void
1155 mark_bivs (struct ivopts_data *data)
1156 {
1157   gphi *phi;
1158   gimple *def;
1159   tree var;
1160   struct iv *iv, *incr_iv;
1161   struct loop *loop = data->current_loop;
1162   basic_block incr_bb;
1163   gphi_iterator psi;
1164
1165   data->bivs_not_used_in_addr = 0;
1166   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1167     {
1168       phi = psi.phi ();
1169
1170       iv = get_iv (data, PHI_RESULT (phi));
1171       if (!iv)
1172         continue;
1173
1174       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1175       def = SSA_NAME_DEF_STMT (var);
1176       /* Don't mark iv peeled from other one as biv.  */
1177       if (def
1178           && gimple_code (def) == GIMPLE_PHI
1179           && gimple_bb (def) == loop->header)
1180         continue;
1181
1182       incr_iv = get_iv (data, var);
1183       if (!incr_iv)
1184         continue;
1185
1186       /* If the increment is in the subloop, ignore it.  */
1187       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1188       if (incr_bb->loop_father != data->current_loop
1189           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1190         continue;
1191
1192       iv->biv_p = true;
1193       incr_iv->biv_p = true;
1194       if (iv->no_overflow)
1195         data->bivs_not_used_in_addr++;
1196       if (incr_iv->no_overflow)
1197         data->bivs_not_used_in_addr++;
1198     }
1199 }
1200
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202    parameters to IV.  */
1203
1204 static bool
1205 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1206 {
1207   tree lhs, stop;
1208   struct loop *loop = data->current_loop;
1209
1210   iv->base = NULL_TREE;
1211   iv->step = NULL_TREE;
1212
1213   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1214     return false;
1215
1216   lhs = gimple_assign_lhs (stmt);
1217   if (TREE_CODE (lhs) != SSA_NAME)
1218     return false;
1219
1220   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1221     return false;
1222
1223   /* Stop expanding iv base at the first ssa var referred by iv step.
1224      Ideally we should stop at any ssa var, because that's expensive
1225      and unusual to happen, we just do it on the first one.
1226
1227      See PR64705 for the rationale.  */
1228   stop = extract_single_var_from_expr (iv->step);
1229   iv->base = expand_simple_operations (iv->base, stop);
1230   if (contains_abnormal_ssa_name_p (iv->base)
1231       || contains_abnormal_ssa_name_p (iv->step))
1232     return false;
1233
1234   /* If STMT could throw, then do not consider STMT as defining a GIV.
1235      While this will suppress optimizations, we can not safely delete this
1236      GIV and associated statements, even if it appears it is not used.  */
1237   if (stmt_could_throw_p (stmt))
1238     return false;
1239
1240   return true;
1241 }
1242
1243 /* Finds general ivs in statement STMT.  */
1244
1245 static void
1246 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1247 {
1248   affine_iv iv;
1249
1250   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1251     return;
1252
1253   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1254 }
1255
1256 /* Finds general ivs in basic block BB.  */
1257
1258 static void
1259 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1260 {
1261   gimple_stmt_iterator bsi;
1262
1263   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1264     find_givs_in_stmt (data, gsi_stmt (bsi));
1265 }
1266
1267 /* Finds general ivs.  */
1268
1269 static void
1270 find_givs (struct ivopts_data *data)
1271 {
1272   struct loop *loop = data->current_loop;
1273   basic_block *body = get_loop_body_in_dom_order (loop);
1274   unsigned i;
1275
1276   for (i = 0; i < loop->num_nodes; i++)
1277     find_givs_in_bb (data, body[i]);
1278   free (body);
1279 }
1280
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282    variable and if so, its initial value and step.  */
1283
1284 static bool
1285 find_induction_variables (struct ivopts_data *data)
1286 {
1287   unsigned i;
1288   bitmap_iterator bi;
1289
1290   if (!find_bivs (data))
1291     return false;
1292
1293   find_givs (data);
1294   mark_bivs (data);
1295
1296   if (dump_file && (dump_flags & TDF_DETAILS))
1297     {
1298       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1299
1300       if (niter)
1301         {
1302           fprintf (dump_file, "  number of iterations ");
1303           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1304           if (!integer_zerop (niter->may_be_zero))
1305             {
1306               fprintf (dump_file, "; zero if ");
1307               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1308             }
1309           fprintf (dump_file, "\n\n");
1310         };
1311
1312       fprintf (dump_file, "Induction variables:\n\n");
1313
1314       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1315         {
1316           if (ver_info (data, i)->iv)
1317             dump_iv (dump_file, ver_info (data, i)->iv, true);
1318         }
1319     }
1320
1321   return true;
1322 }
1323
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326    is the const offset stripped from IV base.  For uses of other types,
1327    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1328
1329 static struct iv_use *
1330 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1331             gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1332             unsigned HOST_WIDE_INT addr_offset = 0)
1333 {
1334   struct iv_use *use = XCNEW (struct iv_use);
1335
1336   use->id = n_iv_uses (data);
1337   use->sub_id = 0;
1338   use->type = use_type;
1339   use->iv = iv;
1340   use->stmt = stmt;
1341   use->op_p = use_p;
1342   use->related_cands = BITMAP_ALLOC (NULL);
1343   use->next = NULL;
1344   use->addr_base = addr_base;
1345   use->addr_offset = addr_offset;
1346
1347   data->iv_uses.safe_push (use);
1348
1349   return use;
1350 }
1351
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353    The sub use is recorded under the one whose use id is ID_GROUP.  */
1354
1355 static struct iv_use *
1356 record_sub_use (struct ivopts_data *data, tree *use_p,
1357                     struct iv *iv, gimple *stmt, enum use_type use_type,
1358                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1359                     unsigned int id_group)
1360 {
1361   struct iv_use *use = XCNEW (struct iv_use);
1362   struct iv_use *group = iv_use (data, id_group);
1363
1364   use->id = group->id;
1365   use->sub_id = 0;
1366   use->type = use_type;
1367   use->iv = iv;
1368   use->stmt = stmt;
1369   use->op_p = use_p;
1370   use->related_cands = NULL;
1371   use->addr_base = addr_base;
1372   use->addr_offset = addr_offset;
1373
1374   /* Sub use list is maintained in offset ascending order.  */
1375   if (addr_offset <= group->addr_offset)
1376     {
1377       use->related_cands = group->related_cands;
1378       group->related_cands = NULL;
1379       use->next = group;
1380       data->iv_uses[id_group] = use;
1381     }
1382   else
1383     {
1384       struct iv_use *pre;
1385       do
1386         {
1387           pre = group;
1388           group = group->next;
1389         }
1390       while (group && addr_offset > group->addr_offset);
1391       use->next = pre->next;
1392       pre->next = use;
1393     }
1394
1395   return use;
1396 }
1397
1398 /* Checks whether OP is a loop-level invariant and if so, records it.
1399    NONLINEAR_USE is true if the invariant is used in a way we do not
1400    handle specially.  */
1401
1402 static void
1403 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1404 {
1405   basic_block bb;
1406   struct version_info *info;
1407
1408   if (TREE_CODE (op) != SSA_NAME
1409       || virtual_operand_p (op))
1410     return;
1411
1412   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1413   if (bb
1414       && flow_bb_inside_loop_p (data->current_loop, bb))
1415     return;
1416
1417   info = name_info (data, op);
1418   info->name = op;
1419   info->has_nonlin_use |= nonlinear_use;
1420   if (!info->inv_id)
1421     info->inv_id = ++data->max_inv_id;
1422   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1423 }
1424
1425 /* Checks whether the use OP is interesting and if so, records it.  */
1426
1427 static struct iv_use *
1428 find_interesting_uses_op (struct ivopts_data *data, tree op)
1429 {
1430   struct iv *iv;
1431   gimple *stmt;
1432   struct iv_use *use;
1433
1434   if (TREE_CODE (op) != SSA_NAME)
1435     return NULL;
1436
1437   iv = get_iv (data, op);
1438   if (!iv)
1439     return NULL;
1440
1441   if (iv->have_use_for)
1442     {
1443       use = iv_use (data, iv->use_id);
1444
1445       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1446       return use;
1447     }
1448
1449   if (integer_zerop (iv->step))
1450     {
1451       record_invariant (data, op, true);
1452       return NULL;
1453     }
1454   iv->have_use_for = true;
1455
1456   stmt = SSA_NAME_DEF_STMT (op);
1457   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1458               || is_gimple_assign (stmt));
1459
1460   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1461   iv->use_id = use->id;
1462
1463   return use;
1464 }
1465
1466 /* Given a condition in statement STMT, checks whether it is a compare
1467    of an induction variable and an invariant.  If this is the case,
1468    CONTROL_VAR is set to location of the iv, BOUND to the location of
1469    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1470    induction variable descriptions, and true is returned.  If this is not
1471    the case, CONTROL_VAR and BOUND are set to the arguments of the
1472    condition and false is returned.  */
1473
1474 static bool
1475 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1476                        tree **control_var, tree **bound,
1477                        struct iv **iv_var, struct iv **iv_bound)
1478 {
1479   /* The objects returned when COND has constant operands.  */
1480   static struct iv const_iv;
1481   static tree zero;
1482   tree *op0 = &zero, *op1 = &zero;
1483   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1484   bool ret = false;
1485
1486   if (gimple_code (stmt) == GIMPLE_COND)
1487     {
1488       gcond *cond_stmt = as_a <gcond *> (stmt);
1489       op0 = gimple_cond_lhs_ptr (cond_stmt);
1490       op1 = gimple_cond_rhs_ptr (cond_stmt);
1491     }
1492   else
1493     {
1494       op0 = gimple_assign_rhs1_ptr (stmt);
1495       op1 = gimple_assign_rhs2_ptr (stmt);
1496     }
1497
1498   zero = integer_zero_node;
1499   const_iv.step = integer_zero_node;
1500
1501   if (TREE_CODE (*op0) == SSA_NAME)
1502     iv0 = get_iv (data, *op0);
1503   if (TREE_CODE (*op1) == SSA_NAME)
1504     iv1 = get_iv (data, *op1);
1505
1506   /* Exactly one of the compared values must be an iv, and the other one must
1507      be an invariant.  */
1508   if (!iv0 || !iv1)
1509     goto end;
1510
1511   if (integer_zerop (iv0->step))
1512     {
1513       /* Control variable may be on the other side.  */
1514       std::swap (op0, op1);
1515       std::swap (iv0, iv1);
1516     }
1517   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1518
1519 end:
1520   if (control_var)
1521     *control_var = op0;
1522   if (iv_var)
1523     *iv_var = iv0;
1524   if (bound)
1525     *bound = op1;
1526   if (iv_bound)
1527     *iv_bound = iv1;
1528
1529   return ret;
1530 }
1531
1532 /* Checks whether the condition in STMT is interesting and if so,
1533    records it.  */
1534
1535 static void
1536 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1537 {
1538   tree *var_p, *bound_p;
1539   struct iv *var_iv;
1540
1541   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1542     {
1543       find_interesting_uses_op (data, *var_p);
1544       find_interesting_uses_op (data, *bound_p);
1545       return;
1546     }
1547
1548   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1549 }
1550
1551 /* Returns the outermost loop EXPR is obviously invariant in
1552    relative to the loop LOOP, i.e. if all its operands are defined
1553    outside of the returned loop.  Returns NULL if EXPR is not
1554    even obviously invariant in LOOP.  */
1555
1556 struct loop *
1557 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1558 {
1559   basic_block def_bb;
1560   unsigned i, len;
1561
1562   if (is_gimple_min_invariant (expr))
1563     return current_loops->tree_root;
1564
1565   if (TREE_CODE (expr) == SSA_NAME)
1566     {
1567       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1568       if (def_bb)
1569         {
1570           if (flow_bb_inside_loop_p (loop, def_bb))
1571             return NULL;
1572           return superloop_at_depth (loop,
1573                                      loop_depth (def_bb->loop_father) + 1);
1574         }
1575
1576       return current_loops->tree_root;
1577     }
1578
1579   if (!EXPR_P (expr))
1580     return NULL;
1581
1582   unsigned maxdepth = 0;
1583   len = TREE_OPERAND_LENGTH (expr);
1584   for (i = 0; i < len; i++)
1585     {
1586       struct loop *ivloop;
1587       if (!TREE_OPERAND (expr, i))
1588         continue;
1589
1590       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1591       if (!ivloop)
1592         return NULL;
1593       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1594     }
1595
1596   return superloop_at_depth (loop, maxdepth);
1597 }
1598
1599 /* Returns true if expression EXPR is obviously invariant in LOOP,
1600    i.e. if all its operands are defined outside of the LOOP.  LOOP
1601    should not be the function body.  */
1602
1603 bool
1604 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1605 {
1606   basic_block def_bb;
1607   unsigned i, len;
1608
1609   gcc_assert (loop_depth (loop) > 0);
1610
1611   if (is_gimple_min_invariant (expr))
1612     return true;
1613
1614   if (TREE_CODE (expr) == SSA_NAME)
1615     {
1616       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1617       if (def_bb
1618           && flow_bb_inside_loop_p (loop, def_bb))
1619         return false;
1620
1621       return true;
1622     }
1623
1624   if (!EXPR_P (expr))
1625     return false;
1626
1627   len = TREE_OPERAND_LENGTH (expr);
1628   for (i = 0; i < len; i++)
1629     if (TREE_OPERAND (expr, i)
1630         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1631       return false;
1632
1633   return true;
1634 }
1635
1636 /* Given expression EXPR which computes inductive values with respect
1637    to loop recorded in DATA, this function returns biv from which EXPR
1638    is derived by tracing definition chains of ssa variables in EXPR.  */
1639
1640 static struct iv*
1641 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1642 {
1643   struct iv *iv;
1644   unsigned i, n;
1645   tree e2, e1;
1646   enum tree_code code;
1647   gimple *stmt;
1648
1649   if (expr == NULL_TREE)
1650     return NULL;
1651
1652   if (is_gimple_min_invariant (expr))
1653     return NULL;
1654
1655   code = TREE_CODE (expr);
1656   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1657     {
1658       n = TREE_OPERAND_LENGTH (expr);
1659       for (i = 0; i < n; i++)
1660         {
1661           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1662           if (iv)
1663             return iv;
1664         }
1665     }
1666
1667   /* Stop if it's not ssa name.  */
1668   if (code != SSA_NAME)
1669     return NULL;
1670
1671   iv = get_iv (data, expr);
1672   if (!iv || integer_zerop (iv->step))
1673     return NULL;
1674   else if (iv->biv_p)
1675     return iv;
1676
1677   stmt = SSA_NAME_DEF_STMT (expr);
1678   if (gphi *phi = dyn_cast <gphi *> (stmt))
1679     {
1680       ssa_op_iter iter;
1681       use_operand_p use_p;
1682
1683       if (virtual_operand_p (gimple_phi_result (phi)))
1684         return NULL;
1685
1686       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1687         {
1688           tree use = USE_FROM_PTR (use_p);
1689           iv = find_deriving_biv_for_expr (data, use);
1690           if (iv)
1691             return iv;
1692         }
1693       return NULL;
1694     }
1695   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1696     return NULL;
1697
1698   e1 = gimple_assign_rhs1 (stmt);
1699   code = gimple_assign_rhs_code (stmt);
1700   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1701     return find_deriving_biv_for_expr (data, e1);
1702
1703   switch (code)
1704     {
1705     case MULT_EXPR:
1706     case PLUS_EXPR:
1707     case MINUS_EXPR:
1708     case POINTER_PLUS_EXPR:
1709       /* Increments, decrements and multiplications by a constant
1710          are simple.  */
1711       e2 = gimple_assign_rhs2 (stmt);
1712       iv = find_deriving_biv_for_expr (data, e2);
1713       if (iv)
1714         return iv;
1715
1716       /* Fallthru.  */
1717     CASE_CONVERT:
1718       /* Casts are simple.  */
1719       return find_deriving_biv_for_expr (data, e1);
1720
1721     default:
1722       break;
1723     }
1724
1725   return NULL;
1726 }
1727
1728 /* Record BIV, its predecessor and successor that they are used in
1729    address type uses.  */
1730
1731 static void
1732 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1733 {
1734   unsigned i;
1735   tree type, base_1, base_2;
1736   bitmap_iterator bi;
1737
1738   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1739       || biv->have_address_use || !biv->no_overflow)
1740     return;
1741
1742   type = TREE_TYPE (biv->base);
1743   if (!INTEGRAL_TYPE_P (type))
1744     return;
1745
1746   biv->have_address_use = true;
1747   data->bivs_not_used_in_addr--;
1748   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1749   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1750     {
1751       struct iv *iv = ver_info (data, i)->iv;
1752
1753       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1754           || iv->have_address_use || !iv->no_overflow)
1755         continue;
1756
1757       if (type != TREE_TYPE (iv->base)
1758           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1759         continue;
1760
1761       if (!operand_equal_p (biv->step, iv->step, 0))
1762         continue;
1763
1764       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1765       if (operand_equal_p (base_1, iv->base, 0)
1766           || operand_equal_p (base_2, biv->base, 0))
1767         {
1768           iv->have_address_use = true;
1769           data->bivs_not_used_in_addr--;
1770         }
1771     }
1772 }
1773
1774 /* Cumulates the steps of indices into DATA and replaces their values with the
1775    initial ones.  Returns false when the value of the index cannot be determined.
1776    Callback for for_each_index.  */
1777
1778 struct ifs_ivopts_data
1779 {
1780   struct ivopts_data *ivopts_data;
1781   gimple *stmt;
1782   tree step;
1783 };
1784
1785 static bool
1786 idx_find_step (tree base, tree *idx, void *data)
1787 {
1788   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1789   struct iv *iv;
1790   bool use_overflow_semantics = false;
1791   tree step, iv_base, iv_step, lbound, off;
1792   struct loop *loop = dta->ivopts_data->current_loop;
1793
1794   /* If base is a component ref, require that the offset of the reference
1795      be invariant.  */
1796   if (TREE_CODE (base) == COMPONENT_REF)
1797     {
1798       off = component_ref_field_offset (base);
1799       return expr_invariant_in_loop_p (loop, off);
1800     }
1801
1802   /* If base is array, first check whether we will be able to move the
1803      reference out of the loop (in order to take its address in strength
1804      reduction).  In order for this to work we need both lower bound
1805      and step to be loop invariants.  */
1806   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1807     {
1808       /* Moreover, for a range, the size needs to be invariant as well.  */
1809       if (TREE_CODE (base) == ARRAY_RANGE_REF
1810           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1811         return false;
1812
1813       step = array_ref_element_size (base);
1814       lbound = array_ref_low_bound (base);
1815
1816       if (!expr_invariant_in_loop_p (loop, step)
1817           || !expr_invariant_in_loop_p (loop, lbound))
1818         return false;
1819     }
1820
1821   if (TREE_CODE (*idx) != SSA_NAME)
1822     return true;
1823
1824   iv = get_iv (dta->ivopts_data, *idx);
1825   if (!iv)
1826     return false;
1827
1828   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1829           *&x[0], which is not folded and does not trigger the
1830           ARRAY_REF path below.  */
1831   *idx = iv->base;
1832
1833   if (integer_zerop (iv->step))
1834     return true;
1835
1836   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1837     {
1838       step = array_ref_element_size (base);
1839
1840       /* We only handle addresses whose step is an integer constant.  */
1841       if (TREE_CODE (step) != INTEGER_CST)
1842         return false;
1843     }
1844   else
1845     /* The step for pointer arithmetics already is 1 byte.  */
1846     step = size_one_node;
1847
1848   iv_base = iv->base;
1849   iv_step = iv->step;
1850   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1851     use_overflow_semantics = true;
1852
1853   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1854                             sizetype, &iv_base, &iv_step, dta->stmt,
1855                             use_overflow_semantics))
1856     {
1857       /* The index might wrap.  */
1858       return false;
1859     }
1860
1861   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1862   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1863
1864   if (dta->ivopts_data->bivs_not_used_in_addr)
1865     {
1866       if (!iv->biv_p)
1867         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1868
1869       record_biv_for_address_use (dta->ivopts_data, iv);
1870     }
1871   return true;
1872 }
1873
1874 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1875    object is passed to it in DATA.  */
1876
1877 static bool
1878 idx_record_use (tree base, tree *idx,
1879                 void *vdata)
1880 {
1881   struct ivopts_data *data = (struct ivopts_data *) vdata;
1882   find_interesting_uses_op (data, *idx);
1883   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1884     {
1885       find_interesting_uses_op (data, array_ref_element_size (base));
1886       find_interesting_uses_op (data, array_ref_low_bound (base));
1887     }
1888   return true;
1889 }
1890
1891 /* If we can prove that TOP = cst * BOT for some constant cst,
1892    store cst to MUL and return true.  Otherwise return false.
1893    The returned value is always sign-extended, regardless of the
1894    signedness of TOP and BOT.  */
1895
1896 static bool
1897 constant_multiple_of (tree top, tree bot, widest_int *mul)
1898 {
1899   tree mby;
1900   enum tree_code code;
1901   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1902   widest_int res, p0, p1;
1903
1904   STRIP_NOPS (top);
1905   STRIP_NOPS (bot);
1906
1907   if (operand_equal_p (top, bot, 0))
1908     {
1909       *mul = 1;
1910       return true;
1911     }
1912
1913   code = TREE_CODE (top);
1914   switch (code)
1915     {
1916     case MULT_EXPR:
1917       mby = TREE_OPERAND (top, 1);
1918       if (TREE_CODE (mby) != INTEGER_CST)
1919         return false;
1920
1921       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1922         return false;
1923
1924       *mul = wi::sext (res * wi::to_widest (mby), precision);
1925       return true;
1926
1927     case PLUS_EXPR:
1928     case MINUS_EXPR:
1929       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1930           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1931         return false;
1932
1933       if (code == MINUS_EXPR)
1934         p1 = -p1;
1935       *mul = wi::sext (p0 + p1, precision);
1936       return true;
1937
1938     case INTEGER_CST:
1939       if (TREE_CODE (bot) != INTEGER_CST)
1940         return false;
1941
1942       p0 = widest_int::from (top, SIGNED);
1943       p1 = widest_int::from (bot, SIGNED);
1944       if (p1 == 0)
1945         return false;
1946       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1947       return res == 0;
1948
1949     default:
1950       return false;
1951     }
1952 }
1953
1954 /* Return true if memory reference REF with step STEP may be unaligned.  */
1955
1956 static bool
1957 may_be_unaligned_p (tree ref, tree step)
1958 {
1959   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1960      thus they are not misaligned.  */
1961   if (TREE_CODE (ref) == TARGET_MEM_REF)
1962     return false;
1963
1964   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1965   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1966     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1967
1968   unsigned HOST_WIDE_INT bitpos;
1969   unsigned int ref_align;
1970   get_object_alignment_1 (ref, &ref_align, &bitpos);
1971   if (ref_align < align
1972       || (bitpos % align) != 0
1973       || (bitpos % BITS_PER_UNIT) != 0)
1974     return true;
1975
1976   unsigned int trailing_zeros = tree_ctz (step);
1977   if (trailing_zeros < HOST_BITS_PER_INT
1978       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1979     return true;
1980
1981   return false;
1982 }
1983
1984 /* Return true if EXPR may be non-addressable.   */
1985
1986 bool
1987 may_be_nonaddressable_p (tree expr)
1988 {
1989   switch (TREE_CODE (expr))
1990     {
1991     case TARGET_MEM_REF:
1992       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1993          target, thus they are always addressable.  */
1994       return false;
1995
1996     case COMPONENT_REF:
1997       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1998              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1999
2000     case VIEW_CONVERT_EXPR:
2001       /* This kind of view-conversions may wrap non-addressable objects
2002          and make them look addressable.  After some processing the
2003          non-addressability may be uncovered again, causing ADDR_EXPRs
2004          of inappropriate objects to be built.  */
2005       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2006           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2007         return true;
2008
2009       /* ... fall through ... */
2010
2011     case ARRAY_REF:
2012     case ARRAY_RANGE_REF:
2013       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2014
2015     CASE_CONVERT:
2016       return true;
2017
2018     default:
2019       break;
2020     }
2021
2022   return false;
2023 }
2024
2025 static tree
2026 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2027
2028 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2029    If there is an existing use which has same stripped iv base and step,
2030    this function records this one as a sub use to that; otherwise records
2031    it as a normal one.  */
2032
2033 static struct iv_use *
2034 record_group_use (struct ivopts_data *data, tree *use_p,
2035                   struct iv *iv, gimple *stmt, enum use_type use_type)
2036 {
2037   unsigned int i;
2038   struct iv_use *use;
2039   tree addr_base;
2040   unsigned HOST_WIDE_INT addr_offset;
2041
2042   /* Only support sub use for address type uses, that is, with base
2043      object.  */
2044   if (!iv->base_object)
2045     return record_use (data, use_p, iv, stmt, use_type);
2046
2047   addr_base = strip_offset (iv->base, &addr_offset);
2048   for (i = 0; i < n_iv_uses (data); i++)
2049     {
2050       use = iv_use (data, i);
2051       if (use->type != USE_ADDRESS || !use->iv->base_object)
2052         continue;
2053
2054       /* Check if it has the same stripped base and step.  */
2055       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2056           && operand_equal_p (iv->step, use->iv->step, 0)
2057           && operand_equal_p (addr_base, use->addr_base, 0))
2058         break;
2059     }
2060
2061   if (i == n_iv_uses (data))
2062     return record_use (data, use_p, iv, stmt,
2063                        use_type, addr_base, addr_offset);
2064   else
2065     return record_sub_use (data, use_p, iv, stmt,
2066                            use_type, addr_base, addr_offset, i);
2067 }
2068
2069 /* Finds addresses in *OP_P inside STMT.  */
2070
2071 static void
2072 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2073                                tree *op_p)
2074 {
2075   tree base = *op_p, step = size_zero_node;
2076   struct iv *civ;
2077   struct ifs_ivopts_data ifs_ivopts_data;
2078
2079   /* Do not play with volatile memory references.  A bit too conservative,
2080      perhaps, but safe.  */
2081   if (gimple_has_volatile_ops (stmt))
2082     goto fail;
2083
2084   /* Ignore bitfields for now.  Not really something terribly complicated
2085      to handle.  TODO.  */
2086   if (TREE_CODE (base) == BIT_FIELD_REF)
2087     goto fail;
2088
2089   base = unshare_expr (base);
2090
2091   if (TREE_CODE (base) == TARGET_MEM_REF)
2092     {
2093       tree type = build_pointer_type (TREE_TYPE (base));
2094       tree astep;
2095
2096       if (TMR_BASE (base)
2097           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2098         {
2099           civ = get_iv (data, TMR_BASE (base));
2100           if (!civ)
2101             goto fail;
2102
2103           TMR_BASE (base) = civ->base;
2104           step = civ->step;
2105         }
2106       if (TMR_INDEX2 (base)
2107           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2108         {
2109           civ = get_iv (data, TMR_INDEX2 (base));
2110           if (!civ)
2111             goto fail;
2112
2113           TMR_INDEX2 (base) = civ->base;
2114           step = civ->step;
2115         }
2116       if (TMR_INDEX (base)
2117           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2118         {
2119           civ = get_iv (data, TMR_INDEX (base));
2120           if (!civ)
2121             goto fail;
2122
2123           TMR_INDEX (base) = civ->base;
2124           astep = civ->step;
2125
2126           if (astep)
2127             {
2128               if (TMR_STEP (base))
2129                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2130
2131               step = fold_build2 (PLUS_EXPR, type, step, astep);
2132             }
2133         }
2134
2135       if (integer_zerop (step))
2136         goto fail;
2137       base = tree_mem_ref_addr (type, base);
2138     }
2139   else
2140     {
2141       ifs_ivopts_data.ivopts_data = data;
2142       ifs_ivopts_data.stmt = stmt;
2143       ifs_ivopts_data.step = size_zero_node;
2144       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2145           || integer_zerop (ifs_ivopts_data.step))
2146         goto fail;
2147       step = ifs_ivopts_data.step;
2148
2149       /* Check that the base expression is addressable.  This needs
2150          to be done after substituting bases of IVs into it.  */
2151       if (may_be_nonaddressable_p (base))
2152         goto fail;
2153
2154       /* Moreover, on strict alignment platforms, check that it is
2155          sufficiently aligned.  */
2156       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2157         goto fail;
2158
2159       base = build_fold_addr_expr (base);
2160
2161       /* Substituting bases of IVs into the base expression might
2162          have caused folding opportunities.  */
2163       if (TREE_CODE (base) == ADDR_EXPR)
2164         {
2165           tree *ref = &TREE_OPERAND (base, 0);
2166           while (handled_component_p (*ref))
2167             ref = &TREE_OPERAND (*ref, 0);
2168           if (TREE_CODE (*ref) == MEM_REF)
2169             {
2170               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2171                                       TREE_OPERAND (*ref, 0),
2172                                       TREE_OPERAND (*ref, 1));
2173               if (tem)
2174                 *ref = tem;
2175             }
2176         }
2177     }
2178
2179   civ = alloc_iv (data, base, step);
2180   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2181   return;
2182
2183 fail:
2184   for_each_index (op_p, idx_record_use, data);
2185 }
2186
2187 /* Finds and records invariants used in STMT.  */
2188
2189 static void
2190 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2191 {
2192   ssa_op_iter iter;
2193   use_operand_p use_p;
2194   tree op;
2195
2196   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2197     {
2198       op = USE_FROM_PTR (use_p);
2199       record_invariant (data, op, false);
2200     }
2201 }
2202
2203 /* Finds interesting uses of induction variables in the statement STMT.  */
2204
2205 static void
2206 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2207 {
2208   struct iv *iv;
2209   tree op, *lhs, *rhs;
2210   ssa_op_iter iter;
2211   use_operand_p use_p;
2212   enum tree_code code;
2213
2214   find_invariants_stmt (data, stmt);
2215
2216   if (gimple_code (stmt) == GIMPLE_COND)
2217     {
2218       find_interesting_uses_cond (data, stmt);
2219       return;
2220     }
2221
2222   if (is_gimple_assign (stmt))
2223     {
2224       lhs = gimple_assign_lhs_ptr (stmt);
2225       rhs = gimple_assign_rhs1_ptr (stmt);
2226
2227       if (TREE_CODE (*lhs) == SSA_NAME)
2228         {
2229           /* If the statement defines an induction variable, the uses are not
2230              interesting by themselves.  */
2231
2232           iv = get_iv (data, *lhs);
2233
2234           if (iv && !integer_zerop (iv->step))
2235             return;
2236         }
2237
2238       code = gimple_assign_rhs_code (stmt);
2239       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2240           && (REFERENCE_CLASS_P (*rhs)
2241               || is_gimple_val (*rhs)))
2242         {
2243           if (REFERENCE_CLASS_P (*rhs))
2244             find_interesting_uses_address (data, stmt, rhs);
2245           else
2246             find_interesting_uses_op (data, *rhs);
2247
2248           if (REFERENCE_CLASS_P (*lhs))
2249             find_interesting_uses_address (data, stmt, lhs);
2250           return;
2251         }
2252       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2253         {
2254           find_interesting_uses_cond (data, stmt);
2255           return;
2256         }
2257
2258       /* TODO -- we should also handle address uses of type
2259
2260          memory = call (whatever);
2261
2262          and
2263
2264          call (memory).  */
2265     }
2266
2267   if (gimple_code (stmt) == GIMPLE_PHI
2268       && gimple_bb (stmt) == data->current_loop->header)
2269     {
2270       iv = get_iv (data, PHI_RESULT (stmt));
2271
2272       if (iv && !integer_zerop (iv->step))
2273         return;
2274     }
2275
2276   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2277     {
2278       op = USE_FROM_PTR (use_p);
2279
2280       if (TREE_CODE (op) != SSA_NAME)
2281         continue;
2282
2283       iv = get_iv (data, op);
2284       if (!iv)
2285         continue;
2286
2287       find_interesting_uses_op (data, op);
2288     }
2289 }
2290
2291 /* Finds interesting uses of induction variables outside of loops
2292    on loop exit edge EXIT.  */
2293
2294 static void
2295 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2296 {
2297   gphi *phi;
2298   gphi_iterator psi;
2299   tree def;
2300
2301   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2302     {
2303       phi = psi.phi ();
2304       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2305       if (!virtual_operand_p (def))
2306         find_interesting_uses_op (data, def);
2307     }
2308 }
2309
2310 /* Finds uses of the induction variables that are interesting.  */
2311
2312 static void
2313 find_interesting_uses (struct ivopts_data *data)
2314 {
2315   basic_block bb;
2316   gimple_stmt_iterator bsi;
2317   basic_block *body = get_loop_body (data->current_loop);
2318   unsigned i;
2319   struct version_info *info;
2320   edge e;
2321
2322   if (dump_file && (dump_flags & TDF_DETAILS))
2323     fprintf (dump_file, "Uses:\n\n");
2324
2325   for (i = 0; i < data->current_loop->num_nodes; i++)
2326     {
2327       edge_iterator ei;
2328       bb = body[i];
2329
2330       FOR_EACH_EDGE (e, ei, bb->succs)
2331         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2332             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2333           find_interesting_uses_outside (data, e);
2334
2335       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2336         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2337       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2338         if (!is_gimple_debug (gsi_stmt (bsi)))
2339           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2340     }
2341
2342   if (dump_file && (dump_flags & TDF_DETAILS))
2343     {
2344       bitmap_iterator bi;
2345
2346       fprintf (dump_file, "\n");
2347
2348       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2349         {
2350           info = ver_info (data, i);
2351           if (info->inv_id)
2352             {
2353               fprintf (dump_file, "  ");
2354               print_generic_expr (dump_file, info->name, TDF_SLIM);
2355               fprintf (dump_file, " is invariant (%d)%s\n",
2356                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2357             }
2358         }
2359
2360       fprintf (dump_file, "\n");
2361     }
2362
2363   free (body);
2364 }
2365
2366 /* Compute maximum offset of [base + offset] addressing mode
2367    for memory reference represented by USE.  */
2368
2369 static HOST_WIDE_INT
2370 compute_max_addr_offset (struct iv_use *use)
2371 {
2372   int width;
2373   rtx reg, addr;
2374   HOST_WIDE_INT i, off;
2375   unsigned list_index, num;
2376   addr_space_t as;
2377   machine_mode mem_mode, addr_mode;
2378   static vec<HOST_WIDE_INT> max_offset_list;
2379
2380   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2381   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2382
2383   num = max_offset_list.length ();
2384   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2385   if (list_index >= num)
2386     {
2387       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2388       for (; num < max_offset_list.length (); num++)
2389         max_offset_list[num] = -1;
2390     }
2391
2392   off = max_offset_list[list_index];
2393   if (off != -1)
2394     return off;
2395
2396   addr_mode = targetm.addr_space.address_mode (as);
2397   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2398   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2399
2400   width = GET_MODE_BITSIZE (addr_mode) - 1;
2401   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2402     width = HOST_BITS_PER_WIDE_INT - 1;
2403
2404   for (i = width; i > 0; i--)
2405     {
2406       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2407       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2408       if (memory_address_addr_space_p (mem_mode, addr, as))
2409         break;
2410
2411       /* For some strict-alignment targets, the offset must be naturally
2412          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2413       off = ((unsigned HOST_WIDE_INT) 1 << i);
2414       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2415         {
2416           off -= GET_MODE_SIZE (mem_mode);
2417           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2418           if (memory_address_addr_space_p (mem_mode, addr, as))
2419             break;
2420         }
2421     }
2422   if (i == 0)
2423     off = 0;
2424
2425   max_offset_list[list_index] = off;
2426   return off;
2427 }
2428
2429 /* Check if all small groups should be split.  Return true if and
2430    only if:
2431
2432      1) At least one groups contain two uses with different offsets.
2433      2) No group contains more than two uses with different offsets.
2434
2435    Return false otherwise.  We want to split such groups because:
2436
2437      1) Small groups don't have much benefit and may interfer with
2438         general candidate selection.
2439      2) Size for problem with only small groups is usually small and
2440         general algorithm can handle it well.
2441
2442    TODO -- Above claim may not hold when auto increment is supported.  */
2443
2444 static bool
2445 split_all_small_groups (struct ivopts_data *data)
2446 {
2447   bool split_p = false;
2448   unsigned int i, n, distinct;
2449   struct iv_use *pre, *use;
2450
2451   n = n_iv_uses (data);
2452   for (i = 0; i < n; i++)
2453     {
2454       use = iv_use (data, i);
2455       if (!use->next)
2456         continue;
2457
2458       distinct = 1;
2459       gcc_assert (use->type == USE_ADDRESS);
2460       for (pre = use, use = use->next; use; pre = use, use = use->next)
2461         {
2462           if (pre->addr_offset != use->addr_offset)
2463             distinct++;
2464
2465           if (distinct > 2)
2466             return false;
2467         }
2468       if (distinct == 2)
2469         split_p = true;
2470     }
2471
2472   return split_p;
2473 }
2474
2475 /* For each group of address type uses, this function further groups
2476    these uses according to the maximum offset supported by target's
2477    [base + offset] addressing mode.  */
2478
2479 static void
2480 group_address_uses (struct ivopts_data *data)
2481 {
2482   HOST_WIDE_INT max_offset = -1;
2483   unsigned int i, n, sub_id;
2484   struct iv_use *pre, *use;
2485   unsigned HOST_WIDE_INT addr_offset_first;
2486
2487   /* Reset max offset to split all small groups.  */
2488   if (split_all_small_groups (data))
2489     max_offset = 0;
2490
2491   n = n_iv_uses (data);
2492   for (i = 0; i < n; i++)
2493     {
2494       use = iv_use (data, i);
2495       if (!use->next)
2496         continue;
2497
2498       gcc_assert (use->type == USE_ADDRESS);
2499       if (max_offset != 0)
2500         max_offset = compute_max_addr_offset (use);
2501
2502       while (use)
2503         {
2504           sub_id = 0;
2505           addr_offset_first = use->addr_offset;
2506           /* Only uses with offset that can fit in offset part against
2507              the first use can be grouped together.  */
2508           for (pre = use, use = use->next;
2509                use && (use->addr_offset - addr_offset_first
2510                        <= (unsigned HOST_WIDE_INT) max_offset);
2511                pre = use, use = use->next)
2512             {
2513               use->id = pre->id;
2514               use->sub_id = ++sub_id;
2515             }
2516
2517           /* Break the list and create new group.  */
2518           if (use)
2519             {
2520               pre->next = NULL;
2521               use->id = n_iv_uses (data);
2522               use->related_cands = BITMAP_ALLOC (NULL);
2523               data->iv_uses.safe_push (use);
2524             }
2525         }
2526     }
2527
2528   if (dump_file && (dump_flags & TDF_DETAILS))
2529     dump_uses (dump_file, data);
2530 }
2531
2532 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2533    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2534    we are at the top-level of the processed address.  */
2535
2536 static tree
2537 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2538                 HOST_WIDE_INT *offset)
2539 {
2540   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2541   enum tree_code code;
2542   tree type, orig_type = TREE_TYPE (expr);
2543   HOST_WIDE_INT off0, off1, st;
2544   tree orig_expr = expr;
2545
2546   STRIP_NOPS (expr);
2547
2548   type = TREE_TYPE (expr);
2549   code = TREE_CODE (expr);
2550   *offset = 0;
2551
2552   switch (code)
2553     {
2554     case INTEGER_CST:
2555       if (!cst_and_fits_in_hwi (expr)
2556           || integer_zerop (expr))
2557         return orig_expr;
2558
2559       *offset = int_cst_value (expr);
2560       return build_int_cst (orig_type, 0);
2561
2562     case POINTER_PLUS_EXPR:
2563     case PLUS_EXPR:
2564     case MINUS_EXPR:
2565       op0 = TREE_OPERAND (expr, 0);
2566       op1 = TREE_OPERAND (expr, 1);
2567
2568       op0 = strip_offset_1 (op0, false, false, &off0);
2569       op1 = strip_offset_1 (op1, false, false, &off1);
2570
2571       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2572       if (op0 == TREE_OPERAND (expr, 0)
2573           && op1 == TREE_OPERAND (expr, 1))
2574         return orig_expr;
2575
2576       if (integer_zerop (op1))
2577         expr = op0;
2578       else if (integer_zerop (op0))
2579         {
2580           if (code == MINUS_EXPR)
2581             expr = fold_build1 (NEGATE_EXPR, type, op1);
2582           else
2583             expr = op1;
2584         }
2585       else
2586         expr = fold_build2 (code, type, op0, op1);
2587
2588       return fold_convert (orig_type, expr);
2589
2590     case MULT_EXPR:
2591       op1 = TREE_OPERAND (expr, 1);
2592       if (!cst_and_fits_in_hwi (op1))
2593         return orig_expr;
2594
2595       op0 = TREE_OPERAND (expr, 0);
2596       op0 = strip_offset_1 (op0, false, false, &off0);
2597       if (op0 == TREE_OPERAND (expr, 0))
2598         return orig_expr;
2599
2600       *offset = off0 * int_cst_value (op1);
2601       if (integer_zerop (op0))
2602         expr = op0;
2603       else
2604         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2605
2606       return fold_convert (orig_type, expr);
2607
2608     case ARRAY_REF:
2609     case ARRAY_RANGE_REF:
2610       if (!inside_addr)
2611         return orig_expr;
2612
2613       step = array_ref_element_size (expr);
2614       if (!cst_and_fits_in_hwi (step))
2615         break;
2616
2617       st = int_cst_value (step);
2618       op1 = TREE_OPERAND (expr, 1);
2619       op1 = strip_offset_1 (op1, false, false, &off1);
2620       *offset = off1 * st;
2621
2622       if (top_compref
2623           && integer_zerop (op1))
2624         {
2625           /* Strip the component reference completely.  */
2626           op0 = TREE_OPERAND (expr, 0);
2627           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2628           *offset += off0;
2629           return op0;
2630         }
2631       break;
2632
2633     case COMPONENT_REF:
2634       {
2635         tree field;
2636
2637         if (!inside_addr)
2638           return orig_expr;
2639
2640         tmp = component_ref_field_offset (expr);
2641         field = TREE_OPERAND (expr, 1);
2642         if (top_compref
2643             && cst_and_fits_in_hwi (tmp)
2644             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2645           {
2646             HOST_WIDE_INT boffset, abs_off;
2647
2648             /* Strip the component reference completely.  */
2649             op0 = TREE_OPERAND (expr, 0);
2650             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2651             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2652             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2653             if (boffset < 0)
2654               abs_off = -abs_off;
2655
2656             *offset = off0 + int_cst_value (tmp) + abs_off;
2657             return op0;
2658           }
2659       }
2660       break;
2661
2662     case ADDR_EXPR:
2663       op0 = TREE_OPERAND (expr, 0);
2664       op0 = strip_offset_1 (op0, true, true, &off0);
2665       *offset += off0;
2666
2667       if (op0 == TREE_OPERAND (expr, 0))
2668         return orig_expr;
2669
2670       expr = build_fold_addr_expr (op0);
2671       return fold_convert (orig_type, expr);
2672
2673     case MEM_REF:
2674       /* ???  Offset operand?  */
2675       inside_addr = false;
2676       break;
2677
2678     default:
2679       return orig_expr;
2680     }
2681
2682   /* Default handling of expressions for that we want to recurse into
2683      the first operand.  */
2684   op0 = TREE_OPERAND (expr, 0);
2685   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2686   *offset += off0;
2687
2688   if (op0 == TREE_OPERAND (expr, 0)
2689       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2690     return orig_expr;
2691
2692   expr = copy_node (expr);
2693   TREE_OPERAND (expr, 0) = op0;
2694   if (op1)
2695     TREE_OPERAND (expr, 1) = op1;
2696
2697   /* Inside address, we might strip the top level component references,
2698      thus changing type of the expression.  Handling of ADDR_EXPR
2699      will fix that.  */
2700   expr = fold_convert (orig_type, expr);
2701
2702   return expr;
2703 }
2704
2705 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2706
2707 static tree
2708 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2709 {
2710   HOST_WIDE_INT off;
2711   tree core = strip_offset_1 (expr, false, false, &off);
2712   *offset = off;
2713   return core;
2714 }
2715
2716 /* Returns variant of TYPE that can be used as base for different uses.
2717    We return unsigned type with the same precision, which avoids problems
2718    with overflows.  */
2719
2720 static tree
2721 generic_type_for (tree type)
2722 {
2723   if (POINTER_TYPE_P (type))
2724     return unsigned_type_for (type);
2725
2726   if (TYPE_UNSIGNED (type))
2727     return type;
2728
2729   return unsigned_type_for (type);
2730 }
2731
2732 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2733    the bitmap to that we should store it.  */
2734
2735 static struct ivopts_data *fd_ivopts_data;
2736 static tree
2737 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2738 {
2739   bitmap *depends_on = (bitmap *) data;
2740   struct version_info *info;
2741
2742   if (TREE_CODE (*expr_p) != SSA_NAME)
2743     return NULL_TREE;
2744   info = name_info (fd_ivopts_data, *expr_p);
2745
2746   if (!info->inv_id || info->has_nonlin_use)
2747     return NULL_TREE;
2748
2749   if (!*depends_on)
2750     *depends_on = BITMAP_ALLOC (NULL);
2751   bitmap_set_bit (*depends_on, info->inv_id);
2752
2753   return NULL_TREE;
2754 }
2755
2756 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2757    position to POS.  If USE is not NULL, the candidate is set as related to
2758    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2759    replacement of the final value of the iv by a direct computation.  */
2760
2761 static struct iv_cand *
2762 add_candidate_1 (struct ivopts_data *data,
2763                  tree base, tree step, bool important, enum iv_position pos,
2764                  struct iv_use *use, gimple *incremented_at,
2765                  struct iv *orig_iv = NULL)
2766 {
2767   unsigned i;
2768   struct iv_cand *cand = NULL;
2769   tree type, orig_type;
2770
2771   /* For non-original variables, make sure their values are computed in a type
2772      that does not invoke undefined behavior on overflows (since in general,
2773      we cannot prove that these induction variables are non-wrapping).  */
2774   if (pos != IP_ORIGINAL)
2775     {
2776       orig_type = TREE_TYPE (base);
2777       type = generic_type_for (orig_type);
2778       if (type != orig_type)
2779         {
2780           base = fold_convert (type, base);
2781           step = fold_convert (type, step);
2782         }
2783     }
2784
2785   for (i = 0; i < n_iv_cands (data); i++)
2786     {
2787       cand = iv_cand (data, i);
2788
2789       if (cand->pos != pos)
2790         continue;
2791
2792       if (cand->incremented_at != incremented_at
2793           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2794               && cand->ainc_use != use))
2795         continue;
2796
2797       if (!cand->iv)
2798         {
2799           if (!base && !step)
2800             break;
2801
2802           continue;
2803         }
2804
2805       if (!base && !step)
2806         continue;
2807
2808       if (operand_equal_p (base, cand->iv->base, 0)
2809           && operand_equal_p (step, cand->iv->step, 0)
2810           && (TYPE_PRECISION (TREE_TYPE (base))
2811               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2812         break;
2813     }
2814
2815   if (i == n_iv_cands (data))
2816     {
2817       cand = XCNEW (struct iv_cand);
2818       cand->id = i;
2819
2820       if (!base && !step)
2821         cand->iv = NULL;
2822       else
2823         cand->iv = alloc_iv (data, base, step);
2824
2825       cand->pos = pos;
2826       if (pos != IP_ORIGINAL && cand->iv)
2827         {
2828           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2829           cand->var_after = cand->var_before;
2830         }
2831       cand->important = important;
2832       cand->incremented_at = incremented_at;
2833       data->iv_candidates.safe_push (cand);
2834
2835       if (step
2836           && TREE_CODE (step) != INTEGER_CST)
2837         {
2838           fd_ivopts_data = data;
2839           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2840         }
2841
2842       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2843         cand->ainc_use = use;
2844       else
2845         cand->ainc_use = NULL;
2846
2847       cand->orig_iv = orig_iv;
2848       if (dump_file && (dump_flags & TDF_DETAILS))
2849         dump_cand (dump_file, cand);
2850     }
2851
2852   if (important && !cand->important)
2853     {
2854       cand->important = true;
2855       if (dump_file && (dump_flags & TDF_DETAILS))
2856         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2857     }
2858
2859   if (use)
2860     {
2861       bitmap_set_bit (use->related_cands, i);
2862       if (dump_file && (dump_flags & TDF_DETAILS))
2863         fprintf (dump_file, "Candidate %d is related to use %d\n",
2864                  cand->id, use->id);
2865     }
2866
2867   return cand;
2868 }
2869
2870 /* Returns true if incrementing the induction variable at the end of the LOOP
2871    is allowed.
2872
2873    The purpose is to avoid splitting latch edge with a biv increment, thus
2874    creating a jump, possibly confusing other optimization passes and leaving
2875    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2876    is not available (so we do not have a better alternative), or if the latch
2877    edge is already nonempty.  */
2878
2879 static bool
2880 allow_ip_end_pos_p (struct loop *loop)
2881 {
2882   if (!ip_normal_pos (loop))
2883     return true;
2884
2885   if (!empty_block_p (ip_end_pos (loop)))
2886     return true;
2887
2888   return false;
2889 }
2890
2891 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2892    Important field is set to IMPORTANT.  */
2893
2894 static void
2895 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2896                         bool important, struct iv_use *use)
2897 {
2898   basic_block use_bb = gimple_bb (use->stmt);
2899   machine_mode mem_mode;
2900   unsigned HOST_WIDE_INT cstepi;
2901
2902   /* If we insert the increment in any position other than the standard
2903      ones, we must ensure that it is incremented once per iteration.
2904      It must not be in an inner nested loop, or one side of an if
2905      statement.  */
2906   if (use_bb->loop_father != data->current_loop
2907       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2908       || stmt_could_throw_p (use->stmt)
2909       || !cst_and_fits_in_hwi (step))
2910     return;
2911
2912   cstepi = int_cst_value (step);
2913
2914   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2915   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2916         || USE_STORE_PRE_INCREMENT (mem_mode))
2917        && GET_MODE_SIZE (mem_mode) == cstepi)
2918       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2919            || USE_STORE_PRE_DECREMENT (mem_mode))
2920           && GET_MODE_SIZE (mem_mode) == -cstepi))
2921     {
2922       enum tree_code code = MINUS_EXPR;
2923       tree new_base;
2924       tree new_step = step;
2925
2926       if (POINTER_TYPE_P (TREE_TYPE (base)))
2927         {
2928           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2929           code = POINTER_PLUS_EXPR;
2930         }
2931       else
2932         new_step = fold_convert (TREE_TYPE (base), new_step);
2933       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2934       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2935                        use->stmt);
2936     }
2937   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2938         || USE_STORE_POST_INCREMENT (mem_mode))
2939        && GET_MODE_SIZE (mem_mode) == cstepi)
2940       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2941            || USE_STORE_POST_DECREMENT (mem_mode))
2942           && GET_MODE_SIZE (mem_mode) == -cstepi))
2943     {
2944       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2945                        use->stmt);
2946     }
2947 }
2948
2949 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2950    position to POS.  If USE is not NULL, the candidate is set as related to
2951    it.  The candidate computation is scheduled before exit condition and at
2952    the end of loop.  */
2953
2954 static void
2955 add_candidate (struct ivopts_data *data,
2956                tree base, tree step, bool important, struct iv_use *use,
2957                struct iv *orig_iv = NULL)
2958 {
2959   gcc_assert (use == NULL || use->sub_id == 0);
2960
2961   if (ip_normal_pos (data->current_loop))
2962     add_candidate_1 (data, base, step, important,
2963                      IP_NORMAL, use, NULL, orig_iv);
2964   if (ip_end_pos (data->current_loop)
2965       && allow_ip_end_pos_p (data->current_loop))
2966     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2967 }
2968
2969 /* Adds standard iv candidates.  */
2970
2971 static void
2972 add_standard_iv_candidates (struct ivopts_data *data)
2973 {
2974   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2975
2976   /* The same for a double-integer type if it is still fast enough.  */
2977   if (TYPE_PRECISION
2978         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2979       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2980     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2981                    build_int_cst (long_integer_type_node, 1), true, NULL);
2982
2983   /* The same for a double-integer type if it is still fast enough.  */
2984   if (TYPE_PRECISION
2985         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2986       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2987     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2988                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2989 }
2990
2991
2992 /* Adds candidates bases on the old induction variable IV.  */
2993
2994 static void
2995 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2996 {
2997   gimple *phi;
2998   tree def;
2999   struct iv_cand *cand;
3000
3001   /* Check if this biv is used in address type use.  */
3002   if (iv->no_overflow  && iv->have_address_use
3003       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3004       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3005     {
3006       tree base = fold_convert (sizetype, iv->base);
3007       tree step = fold_convert (sizetype, iv->step);
3008
3009       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3010       add_candidate (data, base, step, true, NULL, iv);
3011       /* Add iv cand of the original type only if it has nonlinear use.  */
3012       if (iv->have_use_for)
3013         add_candidate (data, iv->base, iv->step, true, NULL);
3014     }
3015   else
3016     add_candidate (data, iv->base, iv->step, true, NULL);
3017
3018   /* The same, but with initial value zero.  */
3019   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3020     add_candidate (data, size_int (0), iv->step, true, NULL);
3021   else
3022     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3023                    iv->step, true, NULL);
3024
3025   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3026   if (gimple_code (phi) == GIMPLE_PHI)
3027     {
3028       /* Additionally record the possibility of leaving the original iv
3029          untouched.  */
3030       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3031       /* Don't add candidate if it's from another PHI node because
3032          it's an affine iv appearing in the form of PEELED_CHREC.  */
3033       phi = SSA_NAME_DEF_STMT (def);
3034       if (gimple_code (phi) != GIMPLE_PHI)
3035         {
3036           cand = add_candidate_1 (data,
3037                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3038                                   SSA_NAME_DEF_STMT (def));
3039           cand->var_before = iv->ssa_name;
3040           cand->var_after = def;
3041         }
3042       else
3043         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3044     }
3045 }
3046
3047 /* Adds candidates based on the old induction variables.  */
3048
3049 static void
3050 add_iv_candidate_for_bivs (struct ivopts_data *data)
3051 {
3052   unsigned i;
3053   struct iv *iv;
3054   bitmap_iterator bi;
3055
3056   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3057     {
3058       iv = ver_info (data, i)->iv;
3059       if (iv && iv->biv_p && !integer_zerop (iv->step))
3060         add_iv_candidate_for_biv (data, iv);
3061     }
3062 }
3063
3064 /* Adds candidates based on the value of USE's iv.  */
3065
3066 static void
3067 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3068 {
3069   unsigned HOST_WIDE_INT offset;
3070   tree base;
3071   tree basetype;
3072   struct iv *iv = use->iv;
3073
3074   add_candidate (data, iv->base, iv->step, false, use);
3075
3076   /* The same, but with initial value zero.  Make such variable important,
3077      since it is generic enough so that possibly many uses may be based
3078      on it.  */
3079   basetype = TREE_TYPE (iv->base);
3080   if (POINTER_TYPE_P (basetype))
3081     basetype = sizetype;
3082   add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
3083
3084   /* Third, try removing the constant offset.  Make sure to even
3085      add a candidate for &a[0] vs. (T *)&a.  */
3086   base = strip_offset (iv->base, &offset);
3087   if (offset || base != iv->base)
3088     add_candidate (data, base, iv->step, false, use);
3089
3090   /* At last, add auto-incremental candidates.  Make such variables
3091      important since other iv uses with same base object may be based
3092      on it.  */
3093   if (use != NULL && use->type == USE_ADDRESS)
3094     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3095 }
3096
3097 /* Adds candidates based on the uses.  */
3098
3099 static void
3100 add_iv_candidate_for_uses (struct ivopts_data *data)
3101 {
3102   unsigned i;
3103
3104   for (i = 0; i < n_iv_uses (data); i++)
3105     {
3106       struct iv_use *use = iv_use (data, i);
3107
3108       if (!use)
3109         continue;
3110
3111       switch (use->type)
3112         {
3113         case USE_NONLINEAR_EXPR:
3114         case USE_COMPARE:
3115         case USE_ADDRESS:
3116           /* Just add the ivs based on the value of the iv used here.  */
3117           add_iv_candidate_for_use (data, use);
3118           break;
3119
3120         default:
3121           gcc_unreachable ();
3122         }
3123     }
3124 }
3125
3126 /* Record important candidates and add them to related_cands bitmaps
3127    if needed.  */
3128
3129 static void
3130 record_important_candidates (struct ivopts_data *data)
3131 {
3132   unsigned i;
3133   struct iv_use *use;
3134
3135   for (i = 0; i < n_iv_cands (data); i++)
3136     {
3137       struct iv_cand *cand = iv_cand (data, i);
3138
3139       if (cand->important)
3140         bitmap_set_bit (data->important_candidates, i);
3141     }
3142
3143   data->consider_all_candidates = (n_iv_cands (data)
3144                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3145
3146   if (data->consider_all_candidates)
3147     {
3148       /* We will not need "related_cands" bitmaps in this case,
3149          so release them to decrease peak memory consumption.  */
3150       for (i = 0; i < n_iv_uses (data); i++)
3151         {
3152           use = iv_use (data, i);
3153           BITMAP_FREE (use->related_cands);
3154         }
3155     }
3156   else
3157     {
3158       /* Add important candidates to the related_cands bitmaps.  */
3159       for (i = 0; i < n_iv_uses (data); i++)
3160         bitmap_ior_into (iv_use (data, i)->related_cands,
3161                          data->important_candidates);
3162     }
3163 }
3164
3165 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3166    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3167    we allocate a simple list to every use.  */
3168
3169 static void
3170 alloc_use_cost_map (struct ivopts_data *data)
3171 {
3172   unsigned i, size, s;
3173
3174   for (i = 0; i < n_iv_uses (data); i++)
3175     {
3176       struct iv_use *use = iv_use (data, i);
3177
3178       if (data->consider_all_candidates)
3179         size = n_iv_cands (data);
3180       else
3181         {
3182           s = bitmap_count_bits (use->related_cands);
3183
3184           /* Round up to the power of two, so that moduling by it is fast.  */
3185           size = s ? (1 << ceil_log2 (s)) : 1;
3186         }
3187
3188       use->n_map_members = size;
3189       use->cost_map = XCNEWVEC (struct cost_pair, size);
3190     }
3191 }
3192
3193 /* Returns description of computation cost of expression whose runtime
3194    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3195
3196 static comp_cost
3197 new_cost (unsigned runtime, unsigned complexity)
3198 {
3199   comp_cost cost;
3200
3201   cost.cost = runtime;
3202   cost.complexity = complexity;
3203
3204   return cost;
3205 }
3206
3207 /* Returns true if COST is infinite.  */
3208
3209 static bool
3210 infinite_cost_p (comp_cost cost)
3211 {
3212   return cost.cost == INFTY;
3213 }
3214
3215 /* Adds costs COST1 and COST2.  */
3216
3217 static comp_cost
3218 add_costs (comp_cost cost1, comp_cost cost2)
3219 {
3220   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3221     return infinite_cost;
3222
3223   cost1.cost += cost2.cost;
3224   cost1.complexity += cost2.complexity;
3225
3226   return cost1;
3227 }
3228 /* Subtracts costs COST1 and COST2.  */
3229
3230 static comp_cost
3231 sub_costs (comp_cost cost1, comp_cost cost2)
3232 {
3233   cost1.cost -= cost2.cost;
3234   cost1.complexity -= cost2.complexity;
3235
3236   return cost1;
3237 }
3238
3239 /* Returns a negative number if COST1 < COST2, a positive number if
3240    COST1 > COST2, and 0 if COST1 = COST2.  */
3241
3242 static int
3243 compare_costs (comp_cost cost1, comp_cost cost2)
3244 {
3245   if (cost1.cost == cost2.cost)
3246     return cost1.complexity - cost2.complexity;
3247
3248   return cost1.cost - cost2.cost;
3249 }
3250
3251 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3252    on invariants DEPENDS_ON and that the value used in expressing it
3253    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3254
3255 static void
3256 set_use_iv_cost (struct ivopts_data *data,
3257                  struct iv_use *use, struct iv_cand *cand,
3258                  comp_cost cost, bitmap depends_on, tree value,
3259                  enum tree_code comp, int inv_expr_id)
3260 {
3261   unsigned i, s;
3262
3263   if (infinite_cost_p (cost))
3264     {
3265       BITMAP_FREE (depends_on);
3266       return;
3267     }
3268
3269   if (data->consider_all_candidates)
3270     {
3271       use->cost_map[cand->id].cand = cand;
3272       use->cost_map[cand->id].cost = cost;
3273       use->cost_map[cand->id].depends_on = depends_on;
3274       use->cost_map[cand->id].value = value;
3275       use->cost_map[cand->id].comp = comp;
3276       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3277       return;
3278     }
3279
3280   /* n_map_members is a power of two, so this computes modulo.  */
3281   s = cand->id & (use->n_map_members - 1);
3282   for (i = s; i < use->n_map_members; i++)
3283     if (!use->cost_map[i].cand)
3284       goto found;
3285   for (i = 0; i < s; i++)
3286     if (!use->cost_map[i].cand)
3287       goto found;
3288
3289   gcc_unreachable ();
3290
3291 found:
3292   use->cost_map[i].cand = cand;
3293   use->cost_map[i].cost = cost;
3294   use->cost_map[i].depends_on = depends_on;
3295   use->cost_map[i].value = value;
3296   use->cost_map[i].comp = comp;
3297   use->cost_map[i].inv_expr_id = inv_expr_id;
3298 }
3299
3300 /* Gets cost of (USE, CANDIDATE) pair.  */
3301
3302 static struct cost_pair *
3303 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3304                  struct iv_cand *cand)
3305 {
3306   unsigned i, s;
3307   struct cost_pair *ret;
3308
3309   if (!cand)
3310     return NULL;
3311
3312   if (data->consider_all_candidates)
3313     {
3314       ret = use->cost_map + cand->id;
3315       if (!ret->cand)
3316         return NULL;
3317
3318       return ret;
3319     }
3320
3321   /* n_map_members is a power of two, so this computes modulo.  */
3322   s = cand->id & (use->n_map_members - 1);
3323   for (i = s; i < use->n_map_members; i++)
3324     if (use->cost_map[i].cand == cand)
3325       return use->cost_map + i;
3326     else if (use->cost_map[i].cand == NULL)
3327       return NULL;
3328   for (i = 0; i < s; i++)
3329     if (use->cost_map[i].cand == cand)
3330       return use->cost_map + i;
3331     else if (use->cost_map[i].cand == NULL)
3332       return NULL;
3333
3334   return NULL;
3335 }
3336
3337 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3338 static rtx
3339 produce_memory_decl_rtl (tree obj, int *regno)
3340 {
3341   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3342   machine_mode address_mode = targetm.addr_space.address_mode (as);
3343   rtx x;
3344
3345   gcc_assert (obj);
3346   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3347     {
3348       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3349       x = gen_rtx_SYMBOL_REF (address_mode, name);
3350       SET_SYMBOL_REF_DECL (x, obj);
3351       x = gen_rtx_MEM (DECL_MODE (obj), x);
3352       set_mem_addr_space (x, as);
3353       targetm.encode_section_info (obj, x, true);
3354     }
3355   else
3356     {
3357       x = gen_raw_REG (address_mode, (*regno)++);
3358       x = gen_rtx_MEM (DECL_MODE (obj), x);
3359       set_mem_addr_space (x, as);
3360     }
3361
3362   return x;
3363 }
3364
3365 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3366    walk_tree.  DATA contains the actual fake register number.  */
3367
3368 static tree
3369 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3370 {
3371   tree obj = NULL_TREE;
3372   rtx x = NULL_RTX;
3373   int *regno = (int *) data;
3374
3375   switch (TREE_CODE (*expr_p))
3376     {
3377     case ADDR_EXPR:
3378       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3379            handled_component_p (*expr_p);
3380            expr_p = &TREE_OPERAND (*expr_p, 0))
3381         continue;
3382       obj = *expr_p;
3383       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3384         x = produce_memory_decl_rtl (obj, regno);
3385       break;
3386
3387     case SSA_NAME:
3388       *ws = 0;
3389       obj = SSA_NAME_VAR (*expr_p);
3390       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3391       if (!obj)
3392         return NULL_TREE;
3393       if (!DECL_RTL_SET_P (obj))
3394         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3395       break;
3396
3397     case VAR_DECL:
3398     case PARM_DECL:
3399     case RESULT_DECL:
3400       *ws = 0;
3401       obj = *expr_p;
3402
3403       if (DECL_RTL_SET_P (obj))
3404         break;
3405
3406       if (DECL_MODE (obj) == BLKmode)
3407         x = produce_memory_decl_rtl (obj, regno);
3408       else
3409         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3410
3411       break;
3412
3413     default:
3414       break;
3415     }
3416
3417   if (x)
3418     {
3419       decl_rtl_to_reset.safe_push (obj);
3420       SET_DECL_RTL (obj, x);
3421     }
3422
3423   return NULL_TREE;
3424 }
3425
3426 /* Determines cost of the computation of EXPR.  */
3427
3428 static unsigned
3429 computation_cost (tree expr, bool speed)
3430 {
3431   rtx_insn *seq;
3432   rtx rslt;
3433   tree type = TREE_TYPE (expr);
3434   unsigned cost;
3435   /* Avoid using hard regs in ways which may be unsupported.  */
3436   int regno = LAST_VIRTUAL_REGISTER + 1;
3437   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3438   enum node_frequency real_frequency = node->frequency;
3439
3440   node->frequency = NODE_FREQUENCY_NORMAL;
3441   crtl->maybe_hot_insn_p = speed;
3442   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3443   start_sequence ();
3444   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3445   seq = get_insns ();
3446   end_sequence ();
3447   default_rtl_profile ();
3448   node->frequency = real_frequency;
3449
3450   cost = seq_cost (seq, speed);
3451   if (MEM_P (rslt))
3452     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3453                           TYPE_ADDR_SPACE (type), speed);
3454   else if (!REG_P (rslt))
3455     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3456
3457   return cost;
3458 }
3459
3460 /* Returns variable containing the value of candidate CAND at statement AT.  */
3461
3462 static tree
3463 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3464 {
3465   if (stmt_after_increment (loop, cand, stmt))
3466     return cand->var_after;
3467   else
3468     return cand->var_before;
3469 }
3470
3471 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3472    same precision that is at least as wide as the precision of TYPE, stores
3473    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3474    type of A and B.  */
3475
3476 static tree
3477 determine_common_wider_type (tree *a, tree *b)
3478 {
3479   tree wider_type = NULL;
3480   tree suba, subb;
3481   tree atype = TREE_TYPE (*a);
3482
3483   if (CONVERT_EXPR_P (*a))
3484     {
3485       suba = TREE_OPERAND (*a, 0);
3486       wider_type = TREE_TYPE (suba);
3487       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3488         return atype;
3489     }
3490   else
3491     return atype;
3492
3493   if (CONVERT_EXPR_P (*b))
3494     {
3495       subb = TREE_OPERAND (*b, 0);
3496       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3497         return atype;
3498     }
3499   else
3500     return atype;
3501
3502   *a = suba;
3503   *b = subb;
3504   return wider_type;
3505 }
3506
3507 /* Determines the expression by that USE is expressed from induction variable
3508    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3509    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3510
3511 static bool
3512 get_computation_aff (struct loop *loop,
3513                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3514                      struct aff_tree *aff)
3515 {
3516   tree ubase = use->iv->base;
3517   tree ustep = use->iv->step;
3518   tree cbase = cand->iv->base;
3519   tree cstep = cand->iv->step, cstep_common;
3520   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3521   tree common_type, var;
3522   tree uutype;
3523   aff_tree cbase_aff, var_aff;
3524   widest_int rat;
3525
3526   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3527     {
3528       /* We do not have a precision to express the values of use.  */
3529       return false;
3530     }
3531
3532   var = var_at_stmt (loop, cand, at);
3533   uutype = unsigned_type_for (utype);
3534
3535   /* If the conversion is not noop, perform it.  */
3536   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3537     {
3538       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3539           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3540         {
3541           tree inner_base, inner_step, inner_type;
3542           inner_base = TREE_OPERAND (cbase, 0);
3543           if (CONVERT_EXPR_P (cstep))
3544             inner_step = TREE_OPERAND (cstep, 0);
3545           else
3546             inner_step = cstep;
3547
3548           inner_type = TREE_TYPE (inner_base);
3549           /* If candidate is added from a biv whose type is smaller than
3550              ctype, we know both candidate and the biv won't overflow.
3551              In this case, it's safe to skip the convertion in candidate.
3552              As an example, (unsigned short)((unsigned long)A) equals to
3553              (unsigned short)A, if A has a type no larger than short.  */
3554           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3555             {
3556               cbase = inner_base;
3557               cstep = inner_step;
3558             }
3559         }
3560       cstep = fold_convert (uutype, cstep);
3561       cbase = fold_convert (uutype, cbase);
3562       var = fold_convert (uutype, var);
3563     }
3564
3565   if (!constant_multiple_of (ustep, cstep, &rat))
3566     return false;
3567
3568   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3569      type, we achieve better folding by computing their difference in this
3570      wider type, and cast the result to UUTYPE.  We do not need to worry about
3571      overflows, as all the arithmetics will in the end be performed in UUTYPE
3572      anyway.  */
3573   common_type = determine_common_wider_type (&ubase, &cbase);
3574
3575   /* use = ubase - ratio * cbase + ratio * var.  */
3576   tree_to_aff_combination (ubase, common_type, aff);
3577   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3578   tree_to_aff_combination (var, uutype, &var_aff);
3579
3580   /* We need to shift the value if we are after the increment.  */
3581   if (stmt_after_increment (loop, cand, at))
3582     {
3583       aff_tree cstep_aff;
3584
3585       if (common_type != uutype)
3586         cstep_common = fold_convert (common_type, cstep);
3587       else
3588         cstep_common = cstep;
3589
3590       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3591       aff_combination_add (&cbase_aff, &cstep_aff);
3592     }
3593
3594   aff_combination_scale (&cbase_aff, -rat);
3595   aff_combination_add (aff, &cbase_aff);
3596   if (common_type != uutype)
3597     aff_combination_convert (aff, uutype);
3598
3599   aff_combination_scale (&var_aff, rat);
3600   aff_combination_add (aff, &var_aff);
3601
3602   return true;
3603 }
3604
3605 /* Return the type of USE.  */
3606
3607 static tree
3608 get_use_type (struct iv_use *use)
3609 {
3610   tree base_type = TREE_TYPE (use->iv->base);
3611   tree type;
3612
3613   if (use->type == USE_ADDRESS)
3614     {
3615       /* The base_type may be a void pointer.  Create a pointer type based on
3616          the mem_ref instead.  */
3617       type = build_pointer_type (TREE_TYPE (*use->op_p));
3618       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3619                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3620     }
3621   else
3622     type = base_type;
3623
3624   return type;
3625 }
3626
3627 /* Determines the expression by that USE is expressed from induction variable
3628    CAND at statement AT in LOOP.  The computation is unshared.  */
3629
3630 static tree
3631 get_computation_at (struct loop *loop,
3632                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3633 {
3634   aff_tree aff;
3635   tree type = get_use_type (use);
3636
3637   if (!get_computation_aff (loop, use, cand, at, &aff))
3638     return NULL_TREE;
3639   unshare_aff_combination (&aff);
3640   return fold_convert (type, aff_combination_to_tree (&aff));
3641 }
3642
3643 /* Determines the expression by that USE is expressed from induction variable
3644    CAND in LOOP.  The computation is unshared.  */
3645
3646 static tree
3647 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3648 {
3649   return get_computation_at (loop, use, cand, use->stmt);
3650 }
3651
3652 /* Adjust the cost COST for being in loop setup rather than loop body.
3653    If we're optimizing for space, the loop setup overhead is constant;
3654    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3655 static unsigned
3656 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3657 {
3658   if (cost == INFTY)
3659     return cost;
3660   else if (optimize_loop_for_speed_p (data->current_loop))
3661     return cost / avg_loop_niter (data->current_loop);
3662   else
3663     return cost;
3664 }
3665
3666 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3667    validity for a memory reference accessing memory of mode MODE in
3668    address space AS.  */
3669
3670
3671 bool
3672 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3673                                  addr_space_t as)
3674 {
3675 #define MAX_RATIO 128
3676   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3677   static vec<sbitmap> valid_mult_list;
3678   sbitmap valid_mult;
3679
3680   if (data_index >= valid_mult_list.length ())
3681     valid_mult_list.safe_grow_cleared (data_index + 1);
3682
3683   valid_mult = valid_mult_list[data_index];
3684   if (!valid_mult)
3685     {
3686       machine_mode address_mode = targetm.addr_space.address_mode (as);
3687       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3688       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3689       rtx addr, scaled;
3690       HOST_WIDE_INT i;
3691
3692       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3693       bitmap_clear (valid_mult);
3694       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3695       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3696       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3697         {
3698           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3699           if (memory_address_addr_space_p (mode, addr, as)
3700               || memory_address_addr_space_p (mode, scaled, as))
3701             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3702         }
3703
3704       if (dump_file && (dump_flags & TDF_DETAILS))
3705         {
3706           fprintf (dump_file, "  allowed multipliers:");
3707           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3708             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3709               fprintf (dump_file, " %d", (int) i);
3710           fprintf (dump_file, "\n");
3711           fprintf (dump_file, "\n");
3712         }
3713
3714       valid_mult_list[data_index] = valid_mult;
3715     }
3716
3717   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3718     return false;
3719
3720   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3721 }
3722
3723 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3724    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3725    variable is omitted.  Compute the cost for a memory reference that accesses
3726    a memory location of mode MEM_MODE in address space AS.
3727
3728    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3729    size of MEM_MODE / RATIO) is available.  To make this determination, we
3730    look at the size of the increment to be made, which is given in CSTEP.
3731    CSTEP may be zero if the step is unknown.
3732    STMT_AFTER_INC is true iff the statement we're looking at is after the
3733    increment of the original biv.
3734
3735    TODO -- there must be some better way.  This all is quite crude.  */
3736
3737 enum ainc_type
3738 {
3739   AINC_PRE_INC,         /* Pre increment.  */
3740   AINC_PRE_DEC,         /* Pre decrement.  */
3741   AINC_POST_INC,        /* Post increment.  */
3742   AINC_POST_DEC,        /* Post decrement.  */
3743   AINC_NONE             /* Also the number of auto increment types.  */
3744 };
3745
3746 struct address_cost_data
3747 {
3748   HOST_WIDE_INT min_offset, max_offset;
3749   unsigned costs[2][2][2][2];
3750   unsigned ainc_costs[AINC_NONE];
3751 };
3752
3753
3754 static comp_cost
3755 get_address_cost (bool symbol_present, bool var_present,
3756                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3757                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3758                   addr_space_t as, bool speed,
3759                   bool stmt_after_inc, bool *may_autoinc)
3760 {
3761   machine_mode address_mode = targetm.addr_space.address_mode (as);
3762   static vec<address_cost_data *> address_cost_data_list;
3763   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3764   address_cost_data *data;
3765   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3766   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3767   unsigned cost, acost, complexity;
3768   enum ainc_type autoinc_type;
3769   bool offset_p, ratio_p, autoinc;
3770   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3771   unsigned HOST_WIDE_INT mask;
3772   unsigned bits;
3773
3774   if (data_index >= address_cost_data_list.length ())
3775     address_cost_data_list.safe_grow_cleared (data_index + 1);
3776
3777   data = address_cost_data_list[data_index];
3778   if (!data)
3779     {
3780       HOST_WIDE_INT i;
3781       HOST_WIDE_INT rat, off = 0;
3782       int old_cse_not_expected, width;
3783       unsigned sym_p, var_p, off_p, rat_p, add_c;
3784       rtx_insn *seq;
3785       rtx addr, base;
3786       rtx reg0, reg1;
3787
3788       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3789
3790       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3791
3792       width = GET_MODE_BITSIZE (address_mode) - 1;
3793       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3794         width = HOST_BITS_PER_WIDE_INT - 1;
3795       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3796
3797       for (i = width; i >= 0; i--)
3798         {
3799           off = -((unsigned HOST_WIDE_INT) 1 << i);
3800           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3801           if (memory_address_addr_space_p (mem_mode, addr, as))
3802             break;
3803         }
3804       data->min_offset = (i == -1? 0 : off);
3805
3806       for (i = width; i >= 0; i--)
3807         {
3808           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3809           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3810           if (memory_address_addr_space_p (mem_mode, addr, as))
3811             break;
3812           /* For some strict-alignment targets, the offset must be naturally
3813              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3814           off = mem_mode != QImode
3815                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3816                     - GET_MODE_SIZE (mem_mode)
3817                 : 0;
3818           if (off > 0)
3819             {
3820               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3821               if (memory_address_addr_space_p (mem_mode, addr, as))
3822                 break;
3823             }
3824         }
3825       if (i == -1)
3826         off = 0;
3827       data->max_offset = off;
3828
3829       if (dump_file && (dump_flags & TDF_DETAILS))
3830         {
3831           fprintf (dump_file, "get_address_cost:\n");
3832           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3833                    GET_MODE_NAME (mem_mode),
3834                    data->min_offset);
3835           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3836                    GET_MODE_NAME (mem_mode),
3837                    data->max_offset);
3838         }
3839
3840       rat = 1;
3841       for (i = 2; i <= MAX_RATIO; i++)
3842         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3843           {
3844             rat = i;
3845             break;
3846           }
3847
3848       /* Compute the cost of various addressing modes.  */
3849       acost = 0;
3850       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3851       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3852
3853       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3854           || USE_STORE_PRE_DECREMENT (mem_mode))
3855         {
3856           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3857           has_predec[mem_mode]
3858             = memory_address_addr_space_p (mem_mode, addr, as);
3859
3860           if (has_predec[mem_mode])
3861             data->ainc_costs[AINC_PRE_DEC]
3862               = address_cost (addr, mem_mode, as, speed);
3863         }
3864       if (USE_LOAD_POST_DECREMENT (mem_mode)
3865           || USE_STORE_POST_DECREMENT (mem_mode))
3866         {
3867           addr = gen_rtx_POST_DEC (address_mode, reg0);
3868           has_postdec[mem_mode]
3869             = memory_address_addr_space_p (mem_mode, addr, as);
3870
3871           if (has_postdec[mem_mode])
3872             data->ainc_costs[AINC_POST_DEC]
3873               = address_cost (addr, mem_mode, as, speed);
3874         }
3875       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3876           || USE_STORE_PRE_DECREMENT (mem_mode))
3877         {
3878           addr = gen_rtx_PRE_INC (address_mode, reg0);
3879           has_preinc[mem_mode]
3880             = memory_address_addr_space_p (mem_mode, addr, as);
3881
3882           if (has_preinc[mem_mode])
3883             data->ainc_costs[AINC_PRE_INC]
3884               = address_cost (addr, mem_mode, as, speed);
3885         }
3886       if (USE_LOAD_POST_INCREMENT (mem_mode)
3887           || USE_STORE_POST_INCREMENT (mem_mode))
3888         {
3889           addr = gen_rtx_POST_INC (address_mode, reg0);
3890           has_postinc[mem_mode]
3891             = memory_address_addr_space_p (mem_mode, addr, as);
3892
3893           if (has_postinc[mem_mode])
3894             data->ainc_costs[AINC_POST_INC]
3895               = address_cost (addr, mem_mode, as, speed);
3896         }
3897       for (i = 0; i < 16; i++)
3898         {
3899           sym_p = i & 1;
3900           var_p = (i >> 1) & 1;
3901           off_p = (i >> 2) & 1;
3902           rat_p = (i >> 3) & 1;
3903
3904           addr = reg0;
3905           if (rat_p)
3906             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3907                                    gen_int_mode (rat, address_mode));
3908
3909           if (var_p)
3910             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3911
3912           if (sym_p)
3913             {
3914               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3915               /* ??? We can run into trouble with some backends by presenting
3916                  it with symbols which haven't been properly passed through
3917                  targetm.encode_section_info.  By setting the local bit, we
3918                  enhance the probability of things working.  */
3919               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3920
3921               if (off_p)
3922                 base = gen_rtx_fmt_e (CONST, address_mode,
3923                                       gen_rtx_fmt_ee
3924                                         (PLUS, address_mode, base,
3925                                          gen_int_mode (off, address_mode)));
3926             }
3927           else if (off_p)
3928             base = gen_int_mode (off, address_mode);
3929           else
3930             base = NULL_RTX;
3931
3932           if (base)
3933             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3934
3935           start_sequence ();
3936           /* To avoid splitting addressing modes, pretend that no cse will
3937              follow.  */
3938           old_cse_not_expected = cse_not_expected;
3939           cse_not_expected = true;
3940           addr = memory_address_addr_space (mem_mode, addr, as);
3941           cse_not_expected = old_cse_not_expected;
3942           seq = get_insns ();
3943           end_sequence ();
3944
3945           acost = seq_cost (seq, speed);
3946           acost += address_cost (addr, mem_mode, as, speed);
3947
3948           if (!acost)
3949             acost = 1;
3950           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3951         }
3952
3953       /* On some targets, it is quite expensive to load symbol to a register,
3954          which makes addresses that contain symbols look much more expensive.
3955          However, the symbol will have to be loaded in any case before the
3956          loop (and quite likely we have it in register already), so it does not
3957          make much sense to penalize them too heavily.  So make some final
3958          tweaks for the SYMBOL_PRESENT modes:
3959
3960          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3961          var is cheaper, use this mode with small penalty.
3962          If VAR_PRESENT is true, try whether the mode with
3963          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3964          if this is the case, use it.  */
3965       add_c = add_cost (speed, address_mode);
3966       for (i = 0; i < 8; i++)
3967         {
3968           var_p = i & 1;
3969           off_p = (i >> 1) & 1;
3970           rat_p = (i >> 2) & 1;
3971
3972           acost = data->costs[0][1][off_p][rat_p] + 1;
3973           if (var_p)
3974             acost += add_c;
3975
3976           if (acost < data->costs[1][var_p][off_p][rat_p])
3977             data->costs[1][var_p][off_p][rat_p] = acost;
3978         }
3979
3980       if (dump_file && (dump_flags & TDF_DETAILS))
3981         {
3982           fprintf (dump_file, "Address costs:\n");
3983
3984           for (i = 0; i < 16; i++)
3985             {
3986               sym_p = i & 1;
3987               var_p = (i >> 1) & 1;
3988               off_p = (i >> 2) & 1;
3989               rat_p = (i >> 3) & 1;
3990
3991               fprintf (dump_file, "  ");
3992               if (sym_p)
3993                 fprintf (dump_file, "sym + ");
3994               if (var_p)
3995                 fprintf (dump_file, "var + ");
3996               if (off_p)
3997                 fprintf (dump_file, "cst + ");
3998               if (rat_p)
3999                 fprintf (dump_file, "rat * ");
4000
4001               acost = data->costs[sym_p][var_p][off_p][rat_p];
4002               fprintf (dump_file, "index costs %d\n", acost);
4003             }
4004           if (has_predec[mem_mode] || has_postdec[mem_mode]
4005               || has_preinc[mem_mode] || has_postinc[mem_mode])
4006             fprintf (dump_file, "  May include autoinc/dec\n");
4007           fprintf (dump_file, "\n");
4008         }
4009
4010       address_cost_data_list[data_index] = data;
4011     }
4012
4013   bits = GET_MODE_BITSIZE (address_mode);
4014   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4015   offset &= mask;
4016   if ((offset >> (bits - 1) & 1))
4017     offset |= ~mask;
4018   s_offset = offset;
4019
4020   autoinc = false;
4021   autoinc_type = AINC_NONE;
4022   msize = GET_MODE_SIZE (mem_mode);
4023   autoinc_offset = offset;
4024   if (stmt_after_inc)
4025     autoinc_offset += ratio * cstep;
4026   if (symbol_present || var_present || ratio != 1)
4027     autoinc = false;
4028   else
4029     {
4030       if (has_postinc[mem_mode] && autoinc_offset == 0
4031           && msize == cstep)
4032         autoinc_type = AINC_POST_INC;
4033       else if (has_postdec[mem_mode] && autoinc_offset == 0
4034                && msize == -cstep)
4035         autoinc_type = AINC_POST_DEC;
4036       else if (has_preinc[mem_mode] && autoinc_offset == msize
4037                && msize == cstep)
4038         autoinc_type = AINC_PRE_INC;
4039       else if (has_predec[mem_mode] && autoinc_offset == -msize
4040                && msize == -cstep)
4041         autoinc_type = AINC_PRE_DEC;
4042
4043       if (autoinc_type != AINC_NONE)
4044         autoinc = true;
4045     }
4046
4047   cost = 0;
4048   offset_p = (s_offset != 0
4049               && data->min_offset <= s_offset
4050               && s_offset <= data->max_offset);
4051   ratio_p = (ratio != 1
4052              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4053
4054   if (ratio != 1 && !ratio_p)
4055     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4056
4057   if (s_offset && !offset_p && !symbol_present)
4058     cost += add_cost (speed, address_mode);
4059
4060   if (may_autoinc)
4061     *may_autoinc = autoinc;
4062   if (autoinc)
4063     acost = data->ainc_costs[autoinc_type];
4064   else
4065     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4066   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4067   return new_cost (cost + acost, complexity);
4068 }
4069
4070  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4071     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
4072     calculating the operands of EXPR.  Returns true if successful, and returns
4073     the cost in COST.  */
4074
4075 static bool
4076 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4077                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4078 {
4079   comp_cost res;
4080   tree op1 = TREE_OPERAND (expr, 1);
4081   tree cst = TREE_OPERAND (mult, 1);
4082   tree multop = TREE_OPERAND (mult, 0);
4083   int m = exact_log2 (int_cst_value (cst));
4084   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4085   int as_cost, sa_cost;
4086   bool mult_in_op1;
4087
4088   if (!(m >= 0 && m < maxm))
4089     return false;
4090
4091   STRIP_NOPS (op1);
4092   mult_in_op1 = operand_equal_p (op1, mult, 0);
4093
4094   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4095
4096   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4097      use that in preference to a shift insn followed by an add insn.  */
4098   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4099              ? shiftadd_cost (speed, mode, m)
4100              : (mult_in_op1
4101                 ? shiftsub1_cost (speed, mode, m)
4102                 : shiftsub0_cost (speed, mode, m)));
4103
4104   res = new_cost (MIN (as_cost, sa_cost), 0);
4105   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4106
4107   STRIP_NOPS (multop);
4108   if (!is_gimple_val (multop))
4109     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4110
4111   *cost = res;
4112   return true;
4113 }
4114
4115 /* Estimates cost of forcing expression EXPR into a variable.  */
4116
4117 static comp_cost
4118 force_expr_to_var_cost (tree expr, bool speed)
4119 {
4120   static bool costs_initialized = false;
4121   static unsigned integer_cost [2];
4122   static unsigned symbol_cost [2];
4123   static unsigned address_cost [2];
4124   tree op0, op1;
4125   comp_cost cost0, cost1, cost;
4126   machine_mode mode;
4127
4128   if (!costs_initialized)
4129     {
4130       tree type = build_pointer_type (integer_type_node);
4131       tree var, addr;
4132       rtx x;
4133       int i;
4134
4135       var = create_tmp_var_raw (integer_type_node, "test_var");
4136       TREE_STATIC (var) = 1;
4137       x = produce_memory_decl_rtl (var, NULL);
4138       SET_DECL_RTL (var, x);
4139
4140       addr = build1 (ADDR_EXPR, type, var);
4141
4142
4143       for (i = 0; i < 2; i++)
4144         {
4145           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4146                                                              2000), i);
4147
4148           symbol_cost[i] = computation_cost (addr, i) + 1;
4149
4150           address_cost[i]
4151             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4152           if (dump_file && (dump_flags & TDF_DETAILS))
4153             {
4154               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4155               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4156               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4157               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4158               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4159               fprintf (dump_file, "\n");
4160             }
4161         }
4162
4163       costs_initialized = true;
4164     }
4165
4166   STRIP_NOPS (expr);
4167
4168   if (SSA_VAR_P (expr))
4169     return no_cost;
4170
4171   if (is_gimple_min_invariant (expr))
4172     {
4173       if (TREE_CODE (expr) == INTEGER_CST)
4174         return new_cost (integer_cost [speed], 0);
4175
4176       if (TREE_CODE (expr) == ADDR_EXPR)
4177         {
4178           tree obj = TREE_OPERAND (expr, 0);
4179
4180           if (TREE_CODE (obj) == VAR_DECL
4181               || TREE_CODE (obj) == PARM_DECL
4182               || TREE_CODE (obj) == RESULT_DECL)
4183             return new_cost (symbol_cost [speed], 0);
4184         }
4185
4186       return new_cost (address_cost [speed], 0);
4187     }
4188
4189   switch (TREE_CODE (expr))
4190     {
4191     case POINTER_PLUS_EXPR:
4192     case PLUS_EXPR:
4193     case MINUS_EXPR:
4194     case MULT_EXPR:
4195       op0 = TREE_OPERAND (expr, 0);
4196       op1 = TREE_OPERAND (expr, 1);
4197       STRIP_NOPS (op0);
4198       STRIP_NOPS (op1);
4199       break;
4200
4201     CASE_CONVERT:
4202     case NEGATE_EXPR:
4203       op0 = TREE_OPERAND (expr, 0);
4204       STRIP_NOPS (op0);
4205       op1 = NULL_TREE;
4206       break;
4207
4208     default:
4209       /* Just an arbitrary value, FIXME.  */
4210       return new_cost (target_spill_cost[speed], 0);
4211     }
4212
4213   if (op0 == NULL_TREE
4214       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4215     cost0 = no_cost;
4216   else
4217     cost0 = force_expr_to_var_cost (op0, speed);
4218
4219   if (op1 == NULL_TREE
4220       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4221     cost1 = no_cost;
4222   else
4223     cost1 = force_expr_to_var_cost (op1, speed);
4224
4225   mode = TYPE_MODE (TREE_TYPE (expr));
4226   switch (TREE_CODE (expr))
4227     {
4228     case POINTER_PLUS_EXPR:
4229     case PLUS_EXPR:
4230     case MINUS_EXPR:
4231     case NEGATE_EXPR:
4232       cost = new_cost (add_cost (speed, mode), 0);
4233       if (TREE_CODE (expr) != NEGATE_EXPR)
4234         {
4235           tree mult = NULL_TREE;
4236           comp_cost sa_cost;
4237           if (TREE_CODE (op1) == MULT_EXPR)
4238             mult = op1;
4239           else if (TREE_CODE (op0) == MULT_EXPR)
4240             mult = op0;
4241
4242           if (mult != NULL_TREE
4243               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4244               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4245                                     speed, &sa_cost))
4246             return sa_cost;
4247         }
4248       break;
4249
4250     CASE_CONVERT:
4251       {
4252         tree inner_mode, outer_mode;
4253         outer_mode = TREE_TYPE (expr);
4254         inner_mode = TREE_TYPE (op0);
4255         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4256                                        TYPE_MODE (inner_mode), speed), 0);
4257       }
4258       break;
4259
4260     case MULT_EXPR:
4261       if (cst_and_fits_in_hwi (op0))
4262         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4263                                              mode, speed), 0);
4264       else if (cst_and_fits_in_hwi (op1))
4265         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4266                                              mode, speed), 0);
4267       else
4268         return new_cost (target_spill_cost [speed], 0);
4269       break;
4270
4271     default:
4272       gcc_unreachable ();
4273     }
4274
4275   cost = add_costs (cost, cost0);
4276   cost = add_costs (cost, cost1);
4277
4278   /* Bound the cost by target_spill_cost.  The parts of complicated
4279      computations often are either loop invariant or at least can
4280      be shared between several iv uses, so letting this grow without
4281      limits would not give reasonable results.  */
4282   if (cost.cost > (int) target_spill_cost [speed])
4283     cost.cost = target_spill_cost [speed];
4284
4285   return cost;
4286 }
4287
4288 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4289    invariants the computation depends on.  */
4290
4291 static comp_cost
4292 force_var_cost (struct ivopts_data *data,
4293                 tree expr, bitmap *depends_on)
4294 {
4295   if (depends_on)
4296     {
4297       fd_ivopts_data = data;
4298       walk_tree (&expr, find_depends, depends_on, NULL);
4299     }
4300
4301   return force_expr_to_var_cost (expr, data->speed);
4302 }
4303
4304 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4305    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4306    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4307    invariants the computation depends on.  */
4308
4309 static comp_cost
4310 split_address_cost (struct ivopts_data *data,
4311                     tree addr, bool *symbol_present, bool *var_present,
4312                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4313 {
4314   tree core;
4315   HOST_WIDE_INT bitsize;
4316   HOST_WIDE_INT bitpos;
4317   tree toffset;
4318   machine_mode mode;
4319   int unsignedp, volatilep;
4320
4321   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4322                               &unsignedp, &volatilep, false);
4323
4324   if (toffset != 0
4325       || bitpos % BITS_PER_UNIT != 0
4326       || TREE_CODE (core) != VAR_DECL)
4327     {
4328       *symbol_present = false;
4329       *var_present = true;
4330       fd_ivopts_data = data;
4331       walk_tree (&addr, find_depends, depends_on, NULL);
4332       return new_cost (target_spill_cost[data->speed], 0);
4333     }
4334
4335   *offset += bitpos / BITS_PER_UNIT;
4336   if (TREE_STATIC (core)
4337       || DECL_EXTERNAL (core))
4338     {
4339       *symbol_present = true;
4340       *var_present = false;
4341       return no_cost;
4342     }
4343
4344   *symbol_present = false;
4345   *var_present = true;
4346   return no_cost;
4347 }
4348
4349 /* Estimates cost of expressing difference of addresses E1 - E2 as
4350    var + symbol + offset.  The value of offset is added to OFFSET,
4351    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4352    part is missing.  DEPENDS_ON is a set of the invariants the computation
4353    depends on.  */
4354
4355 static comp_cost
4356 ptr_difference_cost (struct ivopts_data *data,
4357                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4358                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4359 {
4360   HOST_WIDE_INT diff = 0;
4361   aff_tree aff_e1, aff_e2;
4362   tree type;
4363
4364   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4365
4366   if (ptr_difference_const (e1, e2, &diff))
4367     {
4368       *offset += diff;
4369       *symbol_present = false;
4370       *var_present = false;
4371       return no_cost;
4372     }
4373
4374   if (integer_zerop (e2))
4375     return split_address_cost (data, TREE_OPERAND (e1, 0),
4376                                symbol_present, var_present, offset, depends_on);
4377
4378   *symbol_present = false;
4379   *var_present = true;
4380
4381   type = signed_type_for (TREE_TYPE (e1));
4382   tree_to_aff_combination (e1, type, &aff_e1);
4383   tree_to_aff_combination (e2, type, &aff_e2);
4384   aff_combination_scale (&aff_e2, -1);
4385   aff_combination_add (&aff_e1, &aff_e2);
4386
4387   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4388 }
4389
4390 /* Estimates cost of expressing difference E1 - E2 as
4391    var + symbol + offset.  The value of offset is added to OFFSET,
4392    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4393    part is missing.  DEPENDS_ON is a set of the invariants the computation
4394    depends on.  */
4395
4396 static comp_cost
4397 difference_cost (struct ivopts_data *data,
4398                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4399                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4400 {
4401   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4402   unsigned HOST_WIDE_INT off1, off2;
4403   aff_tree aff_e1, aff_e2;
4404   tree type;
4405
4406   e1 = strip_offset (e1, &off1);
4407   e2 = strip_offset (e2, &off2);
4408   *offset += off1 - off2;
4409
4410   STRIP_NOPS (e1);
4411   STRIP_NOPS (e2);
4412
4413   if (TREE_CODE (e1) == ADDR_EXPR)
4414     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4415                                 offset, depends_on);
4416   *symbol_present = false;
4417
4418   if (operand_equal_p (e1, e2, 0))
4419     {
4420       *var_present = false;
4421       return no_cost;
4422     }
4423
4424   *var_present = true;
4425
4426   if (integer_zerop (e2))
4427     return force_var_cost (data, e1, depends_on);
4428
4429   if (integer_zerop (e1))
4430     {
4431       comp_cost cost = force_var_cost (data, e2, depends_on);
4432       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4433       return cost;
4434     }
4435
4436   type = signed_type_for (TREE_TYPE (e1));
4437   tree_to_aff_combination (e1, type, &aff_e1);
4438   tree_to_aff_combination (e2, type, &aff_e2);
4439   aff_combination_scale (&aff_e2, -1);
4440   aff_combination_add (&aff_e1, &aff_e2);
4441
4442   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4443 }
4444
4445 /* Returns true if AFF1 and AFF2 are identical.  */
4446
4447 static bool
4448 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4449 {
4450   unsigned i;
4451
4452   if (aff1->n != aff2->n)
4453     return false;
4454
4455   for (i = 0; i < aff1->n; i++)
4456     {
4457       if (aff1->elts[i].coef != aff2->elts[i].coef)
4458         return false;
4459
4460       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4461         return false;
4462     }
4463   return true;
4464 }
4465
4466 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4467
4468 static int
4469 get_expr_id (struct ivopts_data *data, tree expr)
4470 {
4471   struct iv_inv_expr_ent ent;
4472   struct iv_inv_expr_ent **slot;
4473
4474   ent.expr = expr;
4475   ent.hash = iterative_hash_expr (expr, 0);
4476   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4477   if (*slot)
4478     return (*slot)->id;
4479
4480   *slot = XNEW (struct iv_inv_expr_ent);
4481   (*slot)->expr = expr;
4482   (*slot)->hash = ent.hash;
4483   (*slot)->id = data->inv_expr_id++;
4484   return (*slot)->id;
4485 }
4486
4487 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4488    requires a new compiler generated temporary.  Returns -1 otherwise.
4489    ADDRESS_P is a flag indicating if the expression is for address
4490    computation.  */
4491
4492 static int
4493 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4494                             tree cbase, HOST_WIDE_INT ratio,
4495                             bool address_p)
4496 {
4497   aff_tree ubase_aff, cbase_aff;
4498   tree expr, ub, cb;
4499
4500   STRIP_NOPS (ubase);
4501   STRIP_NOPS (cbase);
4502   ub = ubase;
4503   cb = cbase;
4504
4505   if ((TREE_CODE (ubase) == INTEGER_CST)
4506       && (TREE_CODE (cbase) == INTEGER_CST))
4507     return -1;
4508
4509   /* Strips the constant part. */
4510   if (TREE_CODE (ubase) == PLUS_EXPR
4511       || TREE_CODE (ubase) == MINUS_EXPR
4512       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4513     {
4514       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4515         ubase = TREE_OPERAND (ubase, 0);
4516     }
4517
4518   /* Strips the constant part. */
4519   if (TREE_CODE (cbase) == PLUS_EXPR
4520       || TREE_CODE (cbase) == MINUS_EXPR
4521       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4522     {
4523       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4524         cbase = TREE_OPERAND (cbase, 0);
4525     }
4526
4527   if (address_p)
4528     {
4529       if (((TREE_CODE (ubase) == SSA_NAME)
4530            || (TREE_CODE (ubase) == ADDR_EXPR
4531                && is_gimple_min_invariant (ubase)))
4532           && (TREE_CODE (cbase) == INTEGER_CST))
4533         return -1;
4534
4535       if (((TREE_CODE (cbase) == SSA_NAME)
4536            || (TREE_CODE (cbase) == ADDR_EXPR
4537                && is_gimple_min_invariant (cbase)))
4538           && (TREE_CODE (ubase) == INTEGER_CST))
4539         return -1;
4540     }
4541
4542   if (ratio == 1)
4543     {
4544       if (operand_equal_p (ubase, cbase, 0))
4545         return -1;
4546
4547       if (TREE_CODE (ubase) == ADDR_EXPR
4548           && TREE_CODE (cbase) == ADDR_EXPR)
4549         {
4550           tree usym, csym;
4551
4552           usym = TREE_OPERAND (ubase, 0);
4553           csym = TREE_OPERAND (cbase, 0);
4554           if (TREE_CODE (usym) == ARRAY_REF)
4555             {
4556               tree ind = TREE_OPERAND (usym, 1);
4557               if (TREE_CODE (ind) == INTEGER_CST
4558                   && tree_fits_shwi_p (ind)
4559                   && tree_to_shwi (ind) == 0)
4560                 usym = TREE_OPERAND (usym, 0);
4561             }
4562           if (TREE_CODE (csym) == ARRAY_REF)
4563             {
4564               tree ind = TREE_OPERAND (csym, 1);
4565               if (TREE_CODE (ind) == INTEGER_CST
4566                   && tree_fits_shwi_p (ind)
4567                   && tree_to_shwi (ind) == 0)
4568                 csym = TREE_OPERAND (csym, 0);
4569             }
4570           if (operand_equal_p (usym, csym, 0))
4571             return -1;
4572         }
4573       /* Now do more complex comparison  */
4574       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4575       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4576       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4577         return -1;
4578     }
4579
4580   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4581   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4582
4583   aff_combination_scale (&cbase_aff, -1 * ratio);
4584   aff_combination_add (&ubase_aff, &cbase_aff);
4585   expr = aff_combination_to_tree (&ubase_aff);
4586   return get_expr_id (data, expr);
4587 }
4588
4589
4590
4591 /* Determines the cost of the computation by that USE is expressed
4592    from induction variable CAND.  If ADDRESS_P is true, we just need
4593    to create an address from it, otherwise we want to get it into
4594    register.  A set of invariants we depend on is stored in
4595    DEPENDS_ON.  AT is the statement at that the value is computed.
4596    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4597    addressing is likely.  */
4598
4599 static comp_cost
4600 get_computation_cost_at (struct ivopts_data *data,
4601                          struct iv_use *use, struct iv_cand *cand,
4602                          bool address_p, bitmap *depends_on, gimple *at,
4603                          bool *can_autoinc,
4604                          int *inv_expr_id)
4605 {
4606   tree ubase = use->iv->base, ustep = use->iv->step;
4607   tree cbase, cstep;
4608   tree utype = TREE_TYPE (ubase), ctype;
4609   unsigned HOST_WIDE_INT cstepi, offset = 0;
4610   HOST_WIDE_INT ratio, aratio;
4611   bool var_present, symbol_present, stmt_is_after_inc;
4612   comp_cost cost;
4613   widest_int rat;
4614   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4615   machine_mode mem_mode = (address_p
4616                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4617                                 : VOIDmode);
4618
4619   *depends_on = NULL;
4620
4621   /* Only consider real candidates.  */
4622   if (!cand->iv)
4623     return infinite_cost;
4624
4625   cbase = cand->iv->base;
4626   cstep = cand->iv->step;
4627   ctype = TREE_TYPE (cbase);
4628
4629   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4630     {
4631       /* We do not have a precision to express the values of use.  */
4632       return infinite_cost;
4633     }
4634
4635   if (address_p
4636       || (use->iv->base_object
4637           && cand->iv->base_object
4638           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4639           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4640     {
4641       /* Do not try to express address of an object with computation based
4642          on address of a different object.  This may cause problems in rtl
4643          level alias analysis (that does not expect this to be happening,
4644          as this is illegal in C), and would be unlikely to be useful
4645          anyway.  */
4646       if (use->iv->base_object
4647           && cand->iv->base_object
4648           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4649         return infinite_cost;
4650     }
4651
4652   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4653     {
4654       /* TODO -- add direct handling of this case.  */
4655       goto fallback;
4656     }
4657
4658   /* CSTEPI is removed from the offset in case statement is after the
4659      increment.  If the step is not constant, we use zero instead.
4660      This is a bit imprecise (there is the extra addition), but
4661      redundancy elimination is likely to transform the code so that
4662      it uses value of the variable before increment anyway,
4663      so it is not that much unrealistic.  */
4664   if (cst_and_fits_in_hwi (cstep))
4665     cstepi = int_cst_value (cstep);
4666   else
4667     cstepi = 0;
4668
4669   if (!constant_multiple_of (ustep, cstep, &rat))
4670     return infinite_cost;
4671
4672   if (wi::fits_shwi_p (rat))
4673     ratio = rat.to_shwi ();
4674   else
4675     return infinite_cost;
4676
4677   STRIP_NOPS (cbase);
4678   ctype = TREE_TYPE (cbase);
4679
4680   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4681
4682   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4683      or ratio == 1, it is better to handle this like
4684
4685      ubase - ratio * cbase + ratio * var
4686
4687      (also holds in the case ratio == -1, TODO.  */
4688
4689   if (cst_and_fits_in_hwi (cbase))
4690     {
4691       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4692       cost = difference_cost (data,
4693                               ubase, build_int_cst (utype, 0),
4694                               &symbol_present, &var_present, &offset,
4695                               depends_on);
4696       cost.cost /= avg_loop_niter (data->current_loop);
4697     }
4698   else if (ratio == 1)
4699     {
4700       tree real_cbase = cbase;
4701
4702       /* Check to see if any adjustment is needed.  */
4703       if (cstepi == 0 && stmt_is_after_inc)
4704         {
4705           aff_tree real_cbase_aff;
4706           aff_tree cstep_aff;
4707
4708           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4709                                    &real_cbase_aff);
4710           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4711
4712           aff_combination_add (&real_cbase_aff, &cstep_aff);
4713           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4714         }
4715
4716       cost = difference_cost (data,
4717                               ubase, real_cbase,
4718                               &symbol_present, &var_present, &offset,
4719                               depends_on);
4720       cost.cost /= avg_loop_niter (data->current_loop);
4721     }
4722   else if (address_p
4723            && !POINTER_TYPE_P (ctype)
4724            && multiplier_allowed_in_address_p
4725                 (ratio, mem_mode,
4726                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4727     {
4728       if (cstepi == 0 && stmt_is_after_inc)
4729         {
4730           if (POINTER_TYPE_P (ctype))
4731             cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4732           else
4733             cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4734         }
4735       cbase
4736         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4737       cost = difference_cost (data,
4738                               ubase, cbase,
4739                               &symbol_present, &var_present, &offset,
4740                               depends_on);
4741       cost.cost /= avg_loop_niter (data->current_loop);
4742     }
4743   else
4744     {
4745       cost = force_var_cost (data, cbase, depends_on);
4746       cost = add_costs (cost,
4747                         difference_cost (data,
4748                                          ubase, build_int_cst (utype, 0),
4749                                          &symbol_present, &var_present,
4750                                          &offset, depends_on));
4751       cost.cost /= avg_loop_niter (data->current_loop);
4752       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4753     }
4754
4755   /* Set of invariants depended on by sub use has already been computed
4756      for the first use in the group.  */
4757   if (use->sub_id)
4758     {
4759       cost.cost = 0;
4760       if (depends_on && *depends_on)
4761         bitmap_clear (*depends_on);
4762     }
4763   else if (inv_expr_id)
4764     {
4765       *inv_expr_id =
4766           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4767       /* Clear depends on.  */
4768       if (*inv_expr_id != -1 && depends_on && *depends_on)
4769         bitmap_clear (*depends_on);
4770     }
4771
4772   /* If we are after the increment, the value of the candidate is higher by
4773      one iteration.  */
4774   if (stmt_is_after_inc)
4775     offset -= ratio * cstepi;
4776
4777   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4778      (symbol/var1/const parts may be omitted).  If we are looking for an
4779      address, find the cost of addressing this.  */
4780   if (address_p)
4781     return add_costs (cost,
4782                       get_address_cost (symbol_present, var_present,
4783                                         offset, ratio, cstepi,
4784                                         mem_mode,
4785                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4786                                         speed, stmt_is_after_inc,
4787                                         can_autoinc));
4788
4789   /* Otherwise estimate the costs for computing the expression.  */
4790   if (!symbol_present && !var_present && !offset)
4791     {
4792       if (ratio != 1)
4793         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4794       return cost;
4795     }
4796
4797   /* Symbol + offset should be compile-time computable so consider that they
4798       are added once to the variable, if present.  */
4799   if (var_present && (symbol_present || offset))
4800     cost.cost += adjust_setup_cost (data,
4801                                     add_cost (speed, TYPE_MODE (ctype)));
4802
4803   /* Having offset does not affect runtime cost in case it is added to
4804      symbol, but it increases complexity.  */
4805   if (offset)
4806     cost.complexity++;
4807
4808   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4809
4810   aratio = ratio > 0 ? ratio : -ratio;
4811   if (aratio != 1)
4812     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4813   return cost;
4814
4815 fallback:
4816   if (can_autoinc)
4817     *can_autoinc = false;
4818
4819   {
4820     /* Just get the expression, expand it and measure the cost.  */
4821     tree comp = get_computation_at (data->current_loop, use, cand, at);
4822
4823     if (!comp)
4824       return infinite_cost;
4825
4826     if (address_p)
4827       comp = build_simple_mem_ref (comp);
4828
4829     return new_cost (computation_cost (comp, speed), 0);
4830   }
4831 }
4832
4833 /* Determines the cost of the computation by that USE is expressed
4834    from induction variable CAND.  If ADDRESS_P is true, we just need
4835    to create an address from it, otherwise we want to get it into
4836    register.  A set of invariants we depend on is stored in
4837    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4838    autoinc addressing is likely.  */
4839
4840 static comp_cost
4841 get_computation_cost (struct ivopts_data *data,
4842                       struct iv_use *use, struct iv_cand *cand,
4843                       bool address_p, bitmap *depends_on,
4844                       bool *can_autoinc, int *inv_expr_id)
4845 {
4846   return get_computation_cost_at (data,
4847                                   use, cand, address_p, depends_on, use->stmt,
4848                                   can_autoinc, inv_expr_id);
4849 }
4850
4851 /* Determines cost of basing replacement of USE on CAND in a generic
4852    expression.  */
4853
4854 static bool
4855 determine_use_iv_cost_generic (struct ivopts_data *data,
4856                                struct iv_use *use, struct iv_cand *cand)
4857 {
4858   bitmap depends_on;
4859   comp_cost cost;
4860   int inv_expr_id = -1;
4861
4862   /* The simple case first -- if we need to express value of the preserved
4863      original biv, the cost is 0.  This also prevents us from counting the
4864      cost of increment twice -- once at this use and once in the cost of
4865      the candidate.  */
4866   if (cand->pos == IP_ORIGINAL
4867       && cand->incremented_at == use->stmt)
4868     {
4869       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4870                        ERROR_MARK, -1);
4871       return true;
4872     }
4873
4874   cost = get_computation_cost (data, use, cand, false, &depends_on,
4875                                NULL, &inv_expr_id);
4876
4877   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4878                    inv_expr_id);
4879
4880   return !infinite_cost_p (cost);
4881 }
4882
4883 /* Determines cost of basing replacement of USE on CAND in an address.  */
4884
4885 static bool
4886 determine_use_iv_cost_address (struct ivopts_data *data,
4887                                struct iv_use *use, struct iv_cand *cand)
4888 {
4889   bitmap depends_on;
4890   bool can_autoinc;
4891   int inv_expr_id = -1;
4892   struct iv_use *sub_use;
4893   comp_cost sub_cost;
4894   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4895                                          &can_autoinc, &inv_expr_id);
4896
4897   if (cand->ainc_use == use)
4898     {
4899       if (can_autoinc)
4900         cost.cost -= cand->cost_step;
4901       /* If we generated the candidate solely for exploiting autoincrement
4902          opportunities, and it turns out it can't be used, set the cost to
4903          infinity to make sure we ignore it.  */
4904       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4905         cost = infinite_cost;
4906     }
4907   for (sub_use = use->next;
4908        sub_use && !infinite_cost_p (cost);
4909        sub_use = sub_use->next)
4910     {
4911        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4912                                         &can_autoinc, &inv_expr_id);
4913        cost = add_costs (cost, sub_cost);
4914     }
4915
4916   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4917                    inv_expr_id);
4918
4919   return !infinite_cost_p (cost);
4920 }
4921
4922 /* Computes value of candidate CAND at position AT in iteration NITER, and
4923    stores it to VAL.  */
4924
4925 static void
4926 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4927                aff_tree *val)
4928 {
4929   aff_tree step, delta, nit;
4930   struct iv *iv = cand->iv;
4931   tree type = TREE_TYPE (iv->base);
4932   tree steptype = type;
4933   if (POINTER_TYPE_P (type))
4934     steptype = sizetype;
4935   steptype = unsigned_type_for (type);
4936
4937   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4938   aff_combination_convert (&step, steptype);
4939   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4940   aff_combination_convert (&nit, steptype);
4941   aff_combination_mult (&nit, &step, &delta);
4942   if (stmt_after_increment (loop, cand, at))
4943     aff_combination_add (&delta, &step);
4944
4945   tree_to_aff_combination (iv->base, type, val);
4946   if (!POINTER_TYPE_P (type))
4947     aff_combination_convert (val, steptype);
4948   aff_combination_add (val, &delta);
4949 }
4950
4951 /* Returns period of induction variable iv.  */
4952
4953 static tree
4954 iv_period (struct iv *iv)
4955 {
4956   tree step = iv->step, period, type;
4957   tree pow2div;
4958
4959   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4960
4961   type = unsigned_type_for (TREE_TYPE (step));
4962   /* Period of the iv is lcm (step, type_range)/step -1,
4963      i.e., N*type_range/step - 1. Since type range is power
4964      of two, N == (step >> num_of_ending_zeros_binary (step),
4965      so the final result is
4966
4967        (type_range >> num_of_ending_zeros_binary (step)) - 1
4968
4969   */
4970   pow2div = num_ending_zeros (step);
4971
4972   period = build_low_bits_mask (type,
4973                                 (TYPE_PRECISION (type)
4974                                  - tree_to_uhwi (pow2div)));
4975
4976   return period;
4977 }
4978
4979 /* Returns the comparison operator used when eliminating the iv USE.  */
4980
4981 static enum tree_code
4982 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4983 {
4984   struct loop *loop = data->current_loop;
4985   basic_block ex_bb;
4986   edge exit;
4987
4988   ex_bb = gimple_bb (use->stmt);
4989   exit = EDGE_SUCC (ex_bb, 0);
4990   if (flow_bb_inside_loop_p (loop, exit->dest))
4991     exit = EDGE_SUCC (ex_bb, 1);
4992
4993   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4994 }
4995
4996 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4997    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4998    calculation is performed in non-wrapping type.
4999
5000    TODO: More generally, we could test for the situation that
5001          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5002          This would require knowing the sign of OFFSET.  */
5003
5004 static bool
5005 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5006 {
5007   enum tree_code code;
5008   tree e1, e2;
5009   aff_tree aff_e1, aff_e2, aff_offset;
5010
5011   if (!nowrap_type_p (TREE_TYPE (base)))
5012     return false;
5013
5014   base = expand_simple_operations (base);
5015
5016   if (TREE_CODE (base) == SSA_NAME)
5017     {
5018       gimple *stmt = SSA_NAME_DEF_STMT (base);
5019
5020       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5021         return false;
5022
5023       code = gimple_assign_rhs_code (stmt);
5024       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5025         return false;
5026
5027       e1 = gimple_assign_rhs1 (stmt);
5028       e2 = gimple_assign_rhs2 (stmt);
5029     }
5030   else
5031     {
5032       code = TREE_CODE (base);
5033       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5034         return false;
5035       e1 = TREE_OPERAND (base, 0);
5036       e2 = TREE_OPERAND (base, 1);
5037     }
5038
5039   /* Use affine expansion as deeper inspection to prove the equality.  */
5040   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5041                                   &aff_e2, &data->name_expansion_cache);
5042   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5043                                   &aff_offset, &data->name_expansion_cache);
5044   aff_combination_scale (&aff_offset, -1);
5045   switch (code)
5046     {
5047     case PLUS_EXPR:
5048       aff_combination_add (&aff_e2, &aff_offset);
5049       if (aff_combination_zero_p (&aff_e2))
5050         return true;
5051
5052       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5053                                       &aff_e1, &data->name_expansion_cache);
5054       aff_combination_add (&aff_e1, &aff_offset);
5055       return aff_combination_zero_p (&aff_e1);
5056
5057     case POINTER_PLUS_EXPR:
5058       aff_combination_add (&aff_e2, &aff_offset);
5059       return aff_combination_zero_p (&aff_e2);
5060
5061     default:
5062       return false;
5063     }
5064 }
5065
5066 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5067    comparison with CAND.  NITER describes the number of iterations of
5068    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5069
5070    We aim to handle the following situation:
5071
5072    sometype *base, *p;
5073    int a, b, i;
5074
5075    i = a;
5076    p = p_0 = base + a;
5077
5078    do
5079      {
5080        bla (*p);
5081        p++;
5082        i++;
5083      }
5084    while (i < b);
5085
5086    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5087    We aim to optimize this to
5088
5089    p = p_0 = base + a;
5090    do
5091      {
5092        bla (*p);
5093        p++;
5094      }
5095    while (p < p_0 - a + b);
5096
5097    This preserves the correctness, since the pointer arithmetics does not
5098    overflow.  More precisely:
5099
5100    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5101       overflow in computing it or the values of p.
5102    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5103       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5104
5105 static bool
5106 iv_elimination_compare_lt (struct ivopts_data *data,
5107                            struct iv_cand *cand, enum tree_code *comp_p,
5108                            struct tree_niter_desc *niter)
5109 {
5110   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5111   struct aff_tree nit, tmpa, tmpb;
5112   enum tree_code comp;
5113   HOST_WIDE_INT step;
5114
5115   /* We need to know that the candidate induction variable does not overflow.
5116      While more complex analysis may be used to prove this, for now just
5117      check that the variable appears in the original program and that it
5118      is computed in a type that guarantees no overflows.  */
5119   cand_type = TREE_TYPE (cand->iv->base);
5120   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5121     return false;
5122
5123   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5124      the calculation of the BOUND could overflow, making the comparison
5125      invalid.  */
5126   if (!data->loop_single_exit_p)
5127     return false;
5128
5129   /* We need to be able to decide whether candidate is increasing or decreasing
5130      in order to choose the right comparison operator.  */
5131   if (!cst_and_fits_in_hwi (cand->iv->step))
5132     return false;
5133   step = int_cst_value (cand->iv->step);
5134
5135   /* Check that the number of iterations matches the expected pattern:
5136      a + 1 > b ? 0 : b - a - 1.  */
5137   mbz = niter->may_be_zero;
5138   if (TREE_CODE (mbz) == GT_EXPR)
5139     {
5140       /* Handle a + 1 > b.  */
5141       tree op0 = TREE_OPERAND (mbz, 0);
5142       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5143         {
5144           a = TREE_OPERAND (op0, 0);
5145           b = TREE_OPERAND (mbz, 1);
5146         }
5147       else
5148         return false;
5149     }
5150   else if (TREE_CODE (mbz) == LT_EXPR)
5151     {
5152       tree op1 = TREE_OPERAND (mbz, 1);
5153
5154       /* Handle b < a + 1.  */
5155       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5156         {
5157           a = TREE_OPERAND (op1, 0);
5158           b = TREE_OPERAND (mbz, 0);
5159         }
5160       else
5161         return false;
5162     }
5163   else
5164     return false;
5165
5166   /* Expected number of iterations is B - A - 1.  Check that it matches
5167      the actual number, i.e., that B - A - NITER = 1.  */
5168   tree_to_aff_combination (niter->niter, nit_type, &nit);
5169   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5170   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5171   aff_combination_scale (&nit, -1);
5172   aff_combination_scale (&tmpa, -1);
5173   aff_combination_add (&tmpb, &tmpa);
5174   aff_combination_add (&tmpb, &nit);
5175   if (tmpb.n != 0 || tmpb.offset != 1)
5176     return false;
5177
5178   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5179      overflow.  */
5180   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5181                         cand->iv->step,
5182                         fold_convert (TREE_TYPE (cand->iv->step), a));
5183   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5184     return false;
5185
5186   /* Determine the new comparison operator.  */
5187   comp = step < 0 ? GT_EXPR : LT_EXPR;
5188   if (*comp_p == NE_EXPR)
5189     *comp_p = comp;
5190   else if (*comp_p == EQ_EXPR)
5191     *comp_p = invert_tree_comparison (comp, false);
5192   else
5193     gcc_unreachable ();
5194
5195   return true;
5196 }
5197
5198 /* Check whether it is possible to express the condition in USE by comparison
5199    of candidate CAND.  If so, store the value compared with to BOUND, and the
5200    comparison operator to COMP.  */
5201
5202 static bool
5203 may_eliminate_iv (struct ivopts_data *data,
5204                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5205                   enum tree_code *comp)
5206 {
5207   basic_block ex_bb;
5208   edge exit;
5209   tree period;
5210   struct loop *loop = data->current_loop;
5211   aff_tree bnd;
5212   struct tree_niter_desc *desc = NULL;
5213
5214   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5215     return false;
5216
5217   /* For now works only for exits that dominate the loop latch.
5218      TODO: extend to other conditions inside loop body.  */
5219   ex_bb = gimple_bb (use->stmt);
5220   if (use->stmt != last_stmt (ex_bb)
5221       || gimple_code (use->stmt) != GIMPLE_COND
5222       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5223     return false;
5224
5225   exit = EDGE_SUCC (ex_bb, 0);
5226   if (flow_bb_inside_loop_p (loop, exit->dest))
5227     exit = EDGE_SUCC (ex_bb, 1);
5228   if (flow_bb_inside_loop_p (loop, exit->dest))
5229     return false;
5230
5231   desc = niter_for_exit (data, exit);
5232   if (!desc)
5233     return false;
5234
5235   /* Determine whether we can use the variable to test the exit condition.
5236      This is the case iff the period of the induction variable is greater
5237      than the number of iterations for which the exit condition is true.  */
5238   period = iv_period (cand->iv);
5239
5240   /* If the number of iterations is constant, compare against it directly.  */
5241   if (TREE_CODE (desc->niter) == INTEGER_CST)
5242     {
5243       /* See cand_value_at.  */
5244       if (stmt_after_increment (loop, cand, use->stmt))
5245         {
5246           if (!tree_int_cst_lt (desc->niter, period))
5247             return false;
5248         }
5249       else
5250         {
5251           if (tree_int_cst_lt (period, desc->niter))
5252             return false;
5253         }
5254     }
5255
5256   /* If not, and if this is the only possible exit of the loop, see whether
5257      we can get a conservative estimate on the number of iterations of the
5258      entire loop and compare against that instead.  */
5259   else
5260     {
5261       widest_int period_value, max_niter;
5262
5263       max_niter = desc->max;
5264       if (stmt_after_increment (loop, cand, use->stmt))
5265         max_niter += 1;
5266       period_value = wi::to_widest (period);
5267       if (wi::gtu_p (max_niter, period_value))
5268         {
5269           /* See if we can take advantage of inferred loop bound information.  */
5270           if (data->loop_single_exit_p)
5271             {
5272               if (!max_loop_iterations (loop, &max_niter))
5273                 return false;
5274               /* The loop bound is already adjusted by adding 1.  */
5275               if (wi::gtu_p (max_niter, period_value))
5276                 return false;
5277             }
5278           else
5279             return false;
5280         }
5281     }
5282
5283   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5284
5285   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5286                          aff_combination_to_tree (&bnd));
5287   *comp = iv_elimination_compare (data, use);
5288
5289   /* It is unlikely that computing the number of iterations using division
5290      would be more profitable than keeping the original induction variable.  */
5291   if (expression_expensive_p (*bound))
5292     return false;
5293
5294   /* Sometimes, it is possible to handle the situation that the number of
5295      iterations may be zero unless additional assumtions by using <
5296      instead of != in the exit condition.
5297
5298      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5299            base the exit condition on it.  However, that is often too
5300            expensive.  */
5301   if (!integer_zerop (desc->may_be_zero))
5302     return iv_elimination_compare_lt (data, cand, comp, desc);
5303
5304   return true;
5305 }
5306
5307  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5308     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5309
5310 static int
5311 parm_decl_cost (struct ivopts_data *data, tree bound)
5312 {
5313   tree sbound = bound;
5314   STRIP_NOPS (sbound);
5315
5316   if (TREE_CODE (sbound) == SSA_NAME
5317       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5318       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5319       && data->body_includes_call)
5320     return COSTS_N_INSNS (1);
5321
5322   return 0;
5323 }
5324
5325 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5326
5327 static bool
5328 determine_use_iv_cost_condition (struct ivopts_data *data,
5329                                  struct iv_use *use, struct iv_cand *cand)
5330 {
5331   tree bound = NULL_TREE;
5332   struct iv *cmp_iv;
5333   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5334   comp_cost elim_cost, express_cost, cost, bound_cost;
5335   bool ok;
5336   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5337   tree *control_var, *bound_cst;
5338   enum tree_code comp = ERROR_MARK;
5339
5340   /* Only consider real candidates.  */
5341   if (!cand->iv)
5342     {
5343       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5344                        ERROR_MARK, -1);
5345       return false;
5346     }
5347
5348   /* Try iv elimination.  */
5349   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5350     {
5351       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5352       if (elim_cost.cost == 0)
5353         elim_cost.cost = parm_decl_cost (data, bound);
5354       else if (TREE_CODE (bound) == INTEGER_CST)
5355         elim_cost.cost = 0;
5356       /* If we replace a loop condition 'i < n' with 'p < base + n',
5357          depends_on_elim will have 'base' and 'n' set, which implies
5358          that both 'base' and 'n' will be live during the loop.  More likely,
5359          'base + n' will be loop invariant, resulting in only one live value
5360          during the loop.  So in that case we clear depends_on_elim and set
5361         elim_inv_expr_id instead.  */
5362       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5363         {
5364           elim_inv_expr_id = get_expr_id (data, bound);
5365           bitmap_clear (depends_on_elim);
5366         }
5367       /* The bound is a loop invariant, so it will be only computed
5368          once.  */
5369       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5370     }
5371   else
5372     elim_cost = infinite_cost;
5373
5374   /* Try expressing the original giv.  If it is compared with an invariant,
5375      note that we cannot get rid of it.  */
5376   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5377                               NULL, &cmp_iv);
5378   gcc_assert (ok);
5379
5380   /* When the condition is a comparison of the candidate IV against
5381      zero, prefer this IV.
5382
5383      TODO: The constant that we're subtracting from the cost should
5384      be target-dependent.  This information should be added to the
5385      target costs for each backend.  */
5386   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5387       && integer_zerop (*bound_cst)
5388       && (operand_equal_p (*control_var, cand->var_after, 0)
5389           || operand_equal_p (*control_var, cand->var_before, 0)))
5390     elim_cost.cost -= 1;
5391
5392   express_cost = get_computation_cost (data, use, cand, false,
5393                                        &depends_on_express, NULL,
5394                                        &express_inv_expr_id);
5395   fd_ivopts_data = data;
5396   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5397
5398   /* Count the cost of the original bound as well.  */
5399   bound_cost = force_var_cost (data, *bound_cst, NULL);
5400   if (bound_cost.cost == 0)
5401     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5402   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5403     bound_cost.cost = 0;
5404   express_cost.cost += bound_cost.cost;
5405
5406   /* Choose the better approach, preferring the eliminated IV. */
5407   if (compare_costs (elim_cost, express_cost) <= 0)
5408     {
5409       cost = elim_cost;
5410       depends_on = depends_on_elim;
5411       depends_on_elim = NULL;
5412       inv_expr_id = elim_inv_expr_id;
5413     }
5414   else
5415     {
5416       cost = express_cost;
5417       depends_on = depends_on_express;
5418       depends_on_express = NULL;
5419       bound = NULL_TREE;
5420       comp = ERROR_MARK;
5421       inv_expr_id = express_inv_expr_id;
5422     }
5423
5424   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5425
5426   if (depends_on_elim)
5427     BITMAP_FREE (depends_on_elim);
5428   if (depends_on_express)
5429     BITMAP_FREE (depends_on_express);
5430
5431   return !infinite_cost_p (cost);
5432 }
5433
5434 /* Determines cost of basing replacement of USE on CAND.  Returns false
5435    if USE cannot be based on CAND.  */
5436
5437 static bool
5438 determine_use_iv_cost (struct ivopts_data *data,
5439                        struct iv_use *use, struct iv_cand *cand)
5440 {
5441   switch (use->type)
5442     {
5443     case USE_NONLINEAR_EXPR:
5444       return determine_use_iv_cost_generic (data, use, cand);
5445
5446     case USE_ADDRESS:
5447       return determine_use_iv_cost_address (data, use, cand);
5448
5449     case USE_COMPARE:
5450       return determine_use_iv_cost_condition (data, use, cand);
5451
5452     default:
5453       gcc_unreachable ();
5454     }
5455 }
5456
5457 /* Return true if get_computation_cost indicates that autoincrement is
5458    a possibility for the pair of USE and CAND, false otherwise.  */
5459
5460 static bool
5461 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5462                            struct iv_cand *cand)
5463 {
5464   bitmap depends_on;
5465   bool can_autoinc;
5466   comp_cost cost;
5467
5468   if (use->type != USE_ADDRESS)
5469     return false;
5470
5471   cost = get_computation_cost (data, use, cand, true, &depends_on,
5472                                &can_autoinc, NULL);
5473
5474   BITMAP_FREE (depends_on);
5475
5476   return !infinite_cost_p (cost) && can_autoinc;
5477 }
5478
5479 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5480    use that allows autoincrement, and set their AINC_USE if possible.  */
5481
5482 static void
5483 set_autoinc_for_original_candidates (struct ivopts_data *data)
5484 {
5485   unsigned i, j;
5486
5487   for (i = 0; i < n_iv_cands (data); i++)
5488     {
5489       struct iv_cand *cand = iv_cand (data, i);
5490       struct iv_use *closest_before = NULL;
5491       struct iv_use *closest_after = NULL;
5492       if (cand->pos != IP_ORIGINAL)
5493         continue;
5494
5495       for (j = 0; j < n_iv_uses (data); j++)
5496         {
5497           struct iv_use *use = iv_use (data, j);
5498           unsigned uid = gimple_uid (use->stmt);
5499
5500           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5501             continue;
5502
5503           if (uid < gimple_uid (cand->incremented_at)
5504               && (closest_before == NULL
5505                   || uid > gimple_uid (closest_before->stmt)))
5506             closest_before = use;
5507
5508           if (uid > gimple_uid (cand->incremented_at)
5509               && (closest_after == NULL
5510                   || uid < gimple_uid (closest_after->stmt)))
5511             closest_after = use;
5512         }
5513
5514       if (closest_before != NULL
5515           && autoinc_possible_for_pair (data, closest_before, cand))
5516         cand->ainc_use = closest_before;
5517       else if (closest_after != NULL
5518                && autoinc_possible_for_pair (data, closest_after, cand))
5519         cand->ainc_use = closest_after;
5520     }
5521 }
5522
5523 /* Finds the candidates for the induction variables.  */
5524
5525 static void
5526 find_iv_candidates (struct ivopts_data *data)
5527 {
5528   /* Add commonly used ivs.  */
5529   add_standard_iv_candidates (data);
5530
5531   /* Add old induction variables.  */
5532   add_iv_candidate_for_bivs (data);
5533
5534   /* Add induction variables derived from uses.  */
5535   add_iv_candidate_for_uses (data);
5536
5537   set_autoinc_for_original_candidates (data);
5538
5539   /* Record the important candidates.  */
5540   record_important_candidates (data);
5541 }
5542
5543 /* Determines costs of basing the use of the iv on an iv candidate.  */
5544
5545 static void
5546 determine_use_iv_costs (struct ivopts_data *data)
5547 {
5548   unsigned i, j;
5549   struct iv_use *use;
5550   struct iv_cand *cand;
5551   bitmap to_clear = BITMAP_ALLOC (NULL);
5552
5553   alloc_use_cost_map (data);
5554
5555   for (i = 0; i < n_iv_uses (data); i++)
5556     {
5557       use = iv_use (data, i);
5558
5559       if (data->consider_all_candidates)
5560         {
5561           for (j = 0; j < n_iv_cands (data); j++)
5562             {
5563               cand = iv_cand (data, j);
5564               determine_use_iv_cost (data, use, cand);
5565             }
5566         }
5567       else
5568         {
5569           bitmap_iterator bi;
5570
5571           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5572             {
5573               cand = iv_cand (data, j);
5574               if (!determine_use_iv_cost (data, use, cand))
5575                 bitmap_set_bit (to_clear, j);
5576             }
5577
5578           /* Remove the candidates for that the cost is infinite from
5579              the list of related candidates.  */
5580           bitmap_and_compl_into (use->related_cands, to_clear);
5581           bitmap_clear (to_clear);
5582         }
5583     }
5584
5585   BITMAP_FREE (to_clear);
5586
5587   if (dump_file && (dump_flags & TDF_DETAILS))
5588     {
5589       fprintf (dump_file, "Use-candidate costs:\n");
5590
5591       for (i = 0; i < n_iv_uses (data); i++)
5592         {
5593           use = iv_use (data, i);
5594
5595           fprintf (dump_file, "Use %d:\n", i);
5596           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5597           for (j = 0; j < use->n_map_members; j++)
5598             {
5599               if (!use->cost_map[j].cand
5600                   || infinite_cost_p (use->cost_map[j].cost))
5601                 continue;
5602
5603               fprintf (dump_file, "  %d\t%d\t%d\t",
5604                        use->cost_map[j].cand->id,
5605                        use->cost_map[j].cost.cost,
5606                        use->cost_map[j].cost.complexity);
5607               if (use->cost_map[j].depends_on)
5608                 bitmap_print (dump_file,
5609                               use->cost_map[j].depends_on, "","");
5610               if (use->cost_map[j].inv_expr_id != -1)
5611                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5612               fprintf (dump_file, "\n");
5613             }
5614
5615           fprintf (dump_file, "\n");
5616         }
5617       fprintf (dump_file, "\n");
5618     }
5619 }
5620
5621 /* Determines cost of the candidate CAND.  */
5622
5623 static void
5624 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5625 {
5626   comp_cost cost_base;
5627   unsigned cost, cost_step;
5628   tree base;
5629
5630   if (!cand->iv)
5631     {
5632       cand->cost = 0;
5633       return;
5634     }
5635
5636   /* There are two costs associated with the candidate -- its increment
5637      and its initialization.  The second is almost negligible for any loop
5638      that rolls enough, so we take it just very little into account.  */
5639
5640   base = cand->iv->base;
5641   cost_base = force_var_cost (data, base, NULL);
5642   /* It will be exceptional that the iv register happens to be initialized with
5643      the proper value at no cost.  In general, there will at least be a regcopy
5644      or a const set.  */
5645   if (cost_base.cost == 0)
5646     cost_base.cost = COSTS_N_INSNS (1);
5647   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5648
5649   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5650
5651   /* Prefer the original ivs unless we may gain something by replacing it.
5652      The reason is to make debugging simpler; so this is not relevant for
5653      artificial ivs created by other optimization passes.  */
5654   if (cand->pos != IP_ORIGINAL
5655       || !SSA_NAME_VAR (cand->var_before)
5656       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5657     cost++;
5658
5659   /* Prefer not to insert statements into latch unless there are some
5660      already (so that we do not create unnecessary jumps).  */
5661   if (cand->pos == IP_END
5662       && empty_block_p (ip_end_pos (data->current_loop)))
5663     cost++;
5664
5665   cand->cost = cost;
5666   cand->cost_step = cost_step;
5667 }
5668
5669 /* Determines costs of computation of the candidates.  */
5670
5671 static void
5672 determine_iv_costs (struct ivopts_data *data)
5673 {
5674   unsigned i;
5675
5676   if (dump_file && (dump_flags & TDF_DETAILS))
5677     {
5678       fprintf (dump_file, "Candidate costs:\n");
5679       fprintf (dump_file, "  cand\tcost\n");
5680     }
5681
5682   for (i = 0; i < n_iv_cands (data); i++)
5683     {
5684       struct iv_cand *cand = iv_cand (data, i);
5685
5686       determine_iv_cost (data, cand);
5687
5688       if (dump_file && (dump_flags & TDF_DETAILS))
5689         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5690     }
5691
5692   if (dump_file && (dump_flags & TDF_DETAILS))
5693     fprintf (dump_file, "\n");
5694 }
5695
5696 /* Calculates cost for having SIZE induction variables.  */
5697
5698 static unsigned
5699 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5700 {
5701   /* We add size to the cost, so that we prefer eliminating ivs
5702      if possible.  */
5703   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5704                                             data->body_includes_call);
5705 }
5706
5707 /* For each size of the induction variable set determine the penalty.  */
5708
5709 static void
5710 determine_set_costs (struct ivopts_data *data)
5711 {
5712   unsigned j, n;
5713   gphi *phi;
5714   gphi_iterator psi;
5715   tree op;
5716   struct loop *loop = data->current_loop;
5717   bitmap_iterator bi;
5718
5719   if (dump_file && (dump_flags & TDF_DETAILS))
5720     {
5721       fprintf (dump_file, "Global costs:\n");
5722       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5723       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5724       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5725       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5726     }
5727
5728   n = 0;
5729   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5730     {
5731       phi = psi.phi ();
5732       op = PHI_RESULT (phi);
5733
5734       if (virtual_operand_p (op))
5735         continue;
5736
5737       if (get_iv (data, op))
5738         continue;
5739
5740       n++;
5741     }
5742
5743   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5744     {
5745       struct version_info *info = ver_info (data, j);
5746
5747       if (info->inv_id && info->has_nonlin_use)
5748         n++;
5749     }
5750
5751   data->regs_used = n;
5752   if (dump_file && (dump_flags & TDF_DETAILS))
5753     fprintf (dump_file, "  regs_used %d\n", n);
5754
5755   if (dump_file && (dump_flags & TDF_DETAILS))
5756     {
5757       fprintf (dump_file, "  cost for size:\n");
5758       fprintf (dump_file, "  ivs\tcost\n");
5759       for (j = 0; j <= 2 * target_avail_regs; j++)
5760         fprintf (dump_file, "  %d\t%d\n", j,
5761                  ivopts_global_cost_for_size (data, j));
5762       fprintf (dump_file, "\n");
5763     }
5764 }
5765
5766 /* Returns true if A is a cheaper cost pair than B.  */
5767
5768 static bool
5769 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5770 {
5771   int cmp;
5772
5773   if (!a)
5774     return false;
5775
5776   if (!b)
5777     return true;
5778
5779   cmp = compare_costs (a->cost, b->cost);
5780   if (cmp < 0)
5781     return true;
5782
5783   if (cmp > 0)
5784     return false;
5785
5786   /* In case the costs are the same, prefer the cheaper candidate.  */
5787   if (a->cand->cost < b->cand->cost)
5788     return true;
5789
5790   return false;
5791 }
5792
5793
5794 /* Returns candidate by that USE is expressed in IVS.  */
5795
5796 static struct cost_pair *
5797 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5798 {
5799   return ivs->cand_for_use[use->id];
5800 }
5801
5802 /* Computes the cost field of IVS structure.  */
5803
5804 static void
5805 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5806 {
5807   comp_cost cost = ivs->cand_use_cost;
5808
5809   cost.cost += ivs->cand_cost;
5810
5811   cost.cost += ivopts_global_cost_for_size (data,
5812                                             ivs->n_regs + ivs->num_used_inv_expr);
5813
5814   ivs->cost = cost;
5815 }
5816
5817 /* Remove invariants in set INVS to set IVS.  */
5818
5819 static void
5820 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5821 {
5822   bitmap_iterator bi;
5823   unsigned iid;
5824
5825   if (!invs)
5826     return;
5827
5828   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5829     {
5830       ivs->n_invariant_uses[iid]--;
5831       if (ivs->n_invariant_uses[iid] == 0)
5832         ivs->n_regs--;
5833     }
5834 }
5835
5836 /* Set USE not to be expressed by any candidate in IVS.  */
5837
5838 static void
5839 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5840                  struct iv_use *use)
5841 {
5842   unsigned uid = use->id, cid;
5843   struct cost_pair *cp;
5844
5845   cp = ivs->cand_for_use[uid];
5846   if (!cp)
5847     return;
5848   cid = cp->cand->id;
5849
5850   ivs->bad_uses++;
5851   ivs->cand_for_use[uid] = NULL;
5852   ivs->n_cand_uses[cid]--;
5853
5854   if (ivs->n_cand_uses[cid] == 0)
5855     {
5856       bitmap_clear_bit (ivs->cands, cid);
5857       /* Do not count the pseudocandidates.  */
5858       if (cp->cand->iv)
5859         ivs->n_regs--;
5860       ivs->n_cands--;
5861       ivs->cand_cost -= cp->cand->cost;
5862
5863       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5864     }
5865
5866   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5867
5868   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5869
5870   if (cp->inv_expr_id != -1)
5871     {
5872       ivs->used_inv_expr[cp->inv_expr_id]--;
5873       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5874         ivs->num_used_inv_expr--;
5875     }
5876   iv_ca_recount_cost (data, ivs);
5877 }
5878
5879 /* Add invariants in set INVS to set IVS.  */
5880
5881 static void
5882 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5883 {
5884   bitmap_iterator bi;
5885   unsigned iid;
5886
5887   if (!invs)
5888     return;
5889
5890   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5891     {
5892       ivs->n_invariant_uses[iid]++;
5893       if (ivs->n_invariant_uses[iid] == 1)
5894         ivs->n_regs++;
5895     }
5896 }
5897
5898 /* Set cost pair for USE in set IVS to CP.  */
5899
5900 static void
5901 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5902               struct iv_use *use, struct cost_pair *cp)
5903 {
5904   unsigned uid = use->id, cid;
5905
5906   if (ivs->cand_for_use[uid] == cp)
5907     return;
5908
5909   if (ivs->cand_for_use[uid])
5910     iv_ca_set_no_cp (data, ivs, use);
5911
5912   if (cp)
5913     {
5914       cid = cp->cand->id;
5915
5916       ivs->bad_uses--;
5917       ivs->cand_for_use[uid] = cp;
5918       ivs->n_cand_uses[cid]++;
5919       if (ivs->n_cand_uses[cid] == 1)
5920         {
5921           bitmap_set_bit (ivs->cands, cid);
5922           /* Do not count the pseudocandidates.  */
5923           if (cp->cand->iv)
5924             ivs->n_regs++;
5925           ivs->n_cands++;
5926           ivs->cand_cost += cp->cand->cost;
5927
5928           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5929         }
5930
5931       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5932       iv_ca_set_add_invariants (ivs, cp->depends_on);
5933
5934       if (cp->inv_expr_id != -1)
5935         {
5936           ivs->used_inv_expr[cp->inv_expr_id]++;
5937           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5938             ivs->num_used_inv_expr++;
5939         }
5940       iv_ca_recount_cost (data, ivs);
5941     }
5942 }
5943
5944 /* Extend set IVS by expressing USE by some of the candidates in it
5945    if possible.  Consider all important candidates if candidates in
5946    set IVS don't give any result.  */
5947
5948 static void
5949 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5950                struct iv_use *use)
5951 {
5952   struct cost_pair *best_cp = NULL, *cp;
5953   bitmap_iterator bi;
5954   unsigned i;
5955   struct iv_cand *cand;
5956
5957   gcc_assert (ivs->upto >= use->id);
5958   ivs->upto++;
5959   ivs->bad_uses++;
5960
5961   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5962     {
5963       cand = iv_cand (data, i);
5964       cp = get_use_iv_cost (data, use, cand);
5965       if (cheaper_cost_pair (cp, best_cp))
5966         best_cp = cp;
5967     }
5968
5969   if (best_cp == NULL)
5970     {
5971       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5972         {
5973           cand = iv_cand (data, i);
5974           cp = get_use_iv_cost (data, use, cand);
5975           if (cheaper_cost_pair (cp, best_cp))
5976             best_cp = cp;
5977         }
5978     }
5979
5980   iv_ca_set_cp (data, ivs, use, best_cp);
5981 }
5982
5983 /* Get cost for assignment IVS.  */
5984
5985 static comp_cost
5986 iv_ca_cost (struct iv_ca *ivs)
5987 {
5988   /* This was a conditional expression but it triggered a bug in
5989      Sun C 5.5.  */
5990   if (ivs->bad_uses)
5991     return infinite_cost;
5992   else
5993     return ivs->cost;
5994 }
5995
5996 /* Returns true if all dependences of CP are among invariants in IVS.  */
5997
5998 static bool
5999 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6000 {
6001   unsigned i;
6002   bitmap_iterator bi;
6003
6004   if (!cp->depends_on)
6005     return true;
6006
6007   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6008     {
6009       if (ivs->n_invariant_uses[i] == 0)
6010         return false;
6011     }
6012
6013   return true;
6014 }
6015
6016 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6017    it before NEXT_CHANGE.  */
6018
6019 static struct iv_ca_delta *
6020 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6021                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6022 {
6023   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6024
6025   change->use = use;
6026   change->old_cp = old_cp;
6027   change->new_cp = new_cp;
6028   change->next_change = next_change;
6029
6030   return change;
6031 }
6032
6033 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6034    are rewritten.  */
6035
6036 static struct iv_ca_delta *
6037 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6038 {
6039   struct iv_ca_delta *last;
6040
6041   if (!l2)
6042     return l1;
6043
6044   if (!l1)
6045     return l2;
6046
6047   for (last = l1; last->next_change; last = last->next_change)
6048     continue;
6049   last->next_change = l2;
6050
6051   return l1;
6052 }
6053
6054 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6055
6056 static struct iv_ca_delta *
6057 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6058 {
6059   struct iv_ca_delta *act, *next, *prev = NULL;
6060
6061   for (act = delta; act; act = next)
6062     {
6063       next = act->next_change;
6064       act->next_change = prev;
6065       prev = act;
6066
6067       std::swap (act->old_cp, act->new_cp);
6068     }
6069
6070   return prev;
6071 }
6072
6073 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6074    reverted instead.  */
6075
6076 static void
6077 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6078                     struct iv_ca_delta *delta, bool forward)
6079 {
6080   struct cost_pair *from, *to;
6081   struct iv_ca_delta *act;
6082
6083   if (!forward)
6084     delta = iv_ca_delta_reverse (delta);
6085
6086   for (act = delta; act; act = act->next_change)
6087     {
6088       from = act->old_cp;
6089       to = act->new_cp;
6090       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6091       iv_ca_set_cp (data, ivs, act->use, to);
6092     }
6093
6094   if (!forward)
6095     iv_ca_delta_reverse (delta);
6096 }
6097
6098 /* Returns true if CAND is used in IVS.  */
6099
6100 static bool
6101 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6102 {
6103   return ivs->n_cand_uses[cand->id] > 0;
6104 }
6105
6106 /* Returns number of induction variable candidates in the set IVS.  */
6107
6108 static unsigned
6109 iv_ca_n_cands (struct iv_ca *ivs)
6110 {
6111   return ivs->n_cands;
6112 }
6113
6114 /* Free the list of changes DELTA.  */
6115
6116 static void
6117 iv_ca_delta_free (struct iv_ca_delta **delta)
6118 {
6119   struct iv_ca_delta *act, *next;
6120
6121   for (act = *delta; act; act = next)
6122     {
6123       next = act->next_change;
6124       free (act);
6125     }
6126
6127   *delta = NULL;
6128 }
6129
6130 /* Allocates new iv candidates assignment.  */
6131
6132 static struct iv_ca *
6133 iv_ca_new (struct ivopts_data *data)
6134 {
6135   struct iv_ca *nw = XNEW (struct iv_ca);
6136
6137   nw->upto = 0;
6138   nw->bad_uses = 0;
6139   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6140   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6141   nw->cands = BITMAP_ALLOC (NULL);
6142   nw->n_cands = 0;
6143   nw->n_regs = 0;
6144   nw->cand_use_cost = no_cost;
6145   nw->cand_cost = 0;
6146   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6147   nw->cost = no_cost;
6148   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6149   nw->num_used_inv_expr = 0;
6150
6151   return nw;
6152 }
6153
6154 /* Free memory occupied by the set IVS.  */
6155
6156 static void
6157 iv_ca_free (struct iv_ca **ivs)
6158 {
6159   free ((*ivs)->cand_for_use);
6160   free ((*ivs)->n_cand_uses);
6161   BITMAP_FREE ((*ivs)->cands);
6162   free ((*ivs)->n_invariant_uses);
6163   free ((*ivs)->used_inv_expr);
6164   free (*ivs);
6165   *ivs = NULL;
6166 }
6167
6168 /* Dumps IVS to FILE.  */
6169
6170 static void
6171 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6172 {
6173   const char *pref = "  invariants ";
6174   unsigned i;
6175   comp_cost cost = iv_ca_cost (ivs);
6176
6177   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6178   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6179            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6180   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6181
6182    for (i = 0; i < ivs->upto; i++)
6183     {
6184       struct iv_use *use = iv_use (data, i);
6185       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6186       if (cp)
6187         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6188                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6189       else
6190         fprintf (file, "   use:%d --> ??\n", use->id);
6191     }
6192
6193   for (i = 1; i <= data->max_inv_id; i++)
6194     if (ivs->n_invariant_uses[i])
6195       {
6196         fprintf (file, "%s%d", pref, i);
6197         pref = ", ";
6198       }
6199   fprintf (file, "\n\n");
6200 }
6201
6202 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6203    new set, and store differences in DELTA.  Number of induction variables
6204    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6205    the function will try to find a solution with mimimal iv candidates.  */
6206
6207 static comp_cost
6208 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6209               struct iv_cand *cand, struct iv_ca_delta **delta,
6210               unsigned *n_ivs, bool min_ncand)
6211 {
6212   unsigned i;
6213   comp_cost cost;
6214   struct iv_use *use;
6215   struct cost_pair *old_cp, *new_cp;
6216
6217   *delta = NULL;
6218   for (i = 0; i < ivs->upto; i++)
6219     {
6220       use = iv_use (data, i);
6221       old_cp = iv_ca_cand_for_use (ivs, use);
6222
6223       if (old_cp
6224           && old_cp->cand == cand)
6225         continue;
6226
6227       new_cp = get_use_iv_cost (data, use, cand);
6228       if (!new_cp)
6229         continue;
6230
6231       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6232         continue;
6233
6234       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6235         continue;
6236
6237       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6238     }
6239
6240   iv_ca_delta_commit (data, ivs, *delta, true);
6241   cost = iv_ca_cost (ivs);
6242   if (n_ivs)
6243     *n_ivs = iv_ca_n_cands (ivs);
6244   iv_ca_delta_commit (data, ivs, *delta, false);
6245
6246   return cost;
6247 }
6248
6249 /* Try narrowing set IVS by removing CAND.  Return the cost of
6250    the new set and store the differences in DELTA.  START is
6251    the candidate with which we start narrowing.  */
6252
6253 static comp_cost
6254 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6255               struct iv_cand *cand, struct iv_cand *start,
6256               struct iv_ca_delta **delta)
6257 {
6258   unsigned i, ci;
6259   struct iv_use *use;
6260   struct cost_pair *old_cp, *new_cp, *cp;
6261   bitmap_iterator bi;
6262   struct iv_cand *cnd;
6263   comp_cost cost, best_cost, acost;
6264
6265   *delta = NULL;
6266   for (i = 0; i < n_iv_uses (data); i++)
6267     {
6268       use = iv_use (data, i);
6269
6270       old_cp = iv_ca_cand_for_use (ivs, use);
6271       if (old_cp->cand != cand)
6272         continue;
6273
6274       best_cost = iv_ca_cost (ivs);
6275       /* Start narrowing with START.  */
6276       new_cp = get_use_iv_cost (data, use, start);
6277
6278       if (data->consider_all_candidates)
6279         {
6280           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6281             {
6282               if (ci == cand->id || (start && ci == start->id))
6283                 continue;
6284
6285               cnd = iv_cand (data, ci);
6286
6287               cp = get_use_iv_cost (data, use, cnd);
6288               if (!cp)
6289                 continue;
6290
6291               iv_ca_set_cp (data, ivs, use, cp);
6292               acost = iv_ca_cost (ivs);
6293
6294               if (compare_costs (acost, best_cost) < 0)
6295                 {
6296                   best_cost = acost;
6297                   new_cp = cp;
6298                 }
6299             }
6300         }
6301       else
6302         {
6303           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6304             {
6305               if (ci == cand->id || (start && ci == start->id))
6306                 continue;
6307
6308               cnd = iv_cand (data, ci);
6309
6310               cp = get_use_iv_cost (data, use, cnd);
6311               if (!cp)
6312                 continue;
6313
6314               iv_ca_set_cp (data, ivs, use, cp);
6315               acost = iv_ca_cost (ivs);
6316
6317               if (compare_costs (acost, best_cost) < 0)
6318                 {
6319                   best_cost = acost;
6320                   new_cp = cp;
6321                 }
6322             }
6323         }
6324       /* Restore to old cp for use.  */
6325       iv_ca_set_cp (data, ivs, use, old_cp);
6326
6327       if (!new_cp)
6328         {
6329           iv_ca_delta_free (delta);
6330           return infinite_cost;
6331         }
6332
6333       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6334     }
6335
6336   iv_ca_delta_commit (data, ivs, *delta, true);
6337   cost = iv_ca_cost (ivs);
6338   iv_ca_delta_commit (data, ivs, *delta, false);
6339
6340   return cost;
6341 }
6342
6343 /* Try optimizing the set of candidates IVS by removing candidates different
6344    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6345    differences in DELTA.  */
6346
6347 static comp_cost
6348 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6349              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6350 {
6351   bitmap_iterator bi;
6352   struct iv_ca_delta *act_delta, *best_delta;
6353   unsigned i;
6354   comp_cost best_cost, acost;
6355   struct iv_cand *cand;
6356
6357   best_delta = NULL;
6358   best_cost = iv_ca_cost (ivs);
6359
6360   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6361     {
6362       cand = iv_cand (data, i);
6363
6364       if (cand == except_cand)
6365         continue;
6366
6367       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6368
6369       if (compare_costs (acost, best_cost) < 0)
6370         {
6371           best_cost = acost;
6372           iv_ca_delta_free (&best_delta);
6373           best_delta = act_delta;
6374         }
6375       else
6376         iv_ca_delta_free (&act_delta);
6377     }
6378
6379   if (!best_delta)
6380     {
6381       *delta = NULL;
6382       return best_cost;
6383     }
6384
6385   /* Recurse to possibly remove other unnecessary ivs.  */
6386   iv_ca_delta_commit (data, ivs, best_delta, true);
6387   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6388   iv_ca_delta_commit (data, ivs, best_delta, false);
6389   *delta = iv_ca_delta_join (best_delta, *delta);
6390   return best_cost;
6391 }
6392
6393 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6394    cheaper local cost for USE than BEST_CP.  Return pointer to
6395    the corresponding cost_pair, otherwise just return BEST_CP.  */
6396
6397 static struct cost_pair*
6398 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6399                         unsigned int cand_idx, struct iv_cand *old_cand,
6400                         struct cost_pair *best_cp)
6401 {
6402   struct iv_cand *cand;
6403   struct cost_pair *cp;
6404
6405   gcc_assert (old_cand != NULL && best_cp != NULL);
6406   if (cand_idx == old_cand->id)
6407     return best_cp;
6408
6409   cand = iv_cand (data, cand_idx);
6410   cp = get_use_iv_cost (data, use, cand);
6411   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6412     return cp;
6413
6414   return best_cp;
6415 }
6416
6417 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6418    which are used by more than one iv uses.  For each of those candidates,
6419    this function tries to represent iv uses under that candidate using
6420    other ones with lower local cost, then tries to prune the new set.
6421    If the new set has lower cost, It returns the new cost after recording
6422    candidate replacement in list DELTA.  */
6423
6424 static comp_cost
6425 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6426                struct iv_ca_delta **delta)
6427 {
6428   bitmap_iterator bi, bj;
6429   unsigned int i, j, k;
6430   struct iv_use *use;
6431   struct iv_cand *cand;
6432   comp_cost orig_cost, acost;
6433   struct iv_ca_delta *act_delta, *tmp_delta;
6434   struct cost_pair *old_cp, *best_cp = NULL;
6435
6436   *delta = NULL;
6437   orig_cost = iv_ca_cost (ivs);
6438
6439   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6440     {
6441       if (ivs->n_cand_uses[i] == 1
6442           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6443         continue;
6444
6445       cand = iv_cand (data, i);
6446
6447       act_delta = NULL;
6448       /*  Represent uses under current candidate using other ones with
6449           lower local cost.  */
6450       for (j = 0; j < ivs->upto; j++)
6451         {
6452           use = iv_use (data, j);
6453           old_cp = iv_ca_cand_for_use (ivs, use);
6454
6455           if (old_cp->cand != cand)
6456             continue;
6457
6458           best_cp = old_cp;
6459           if (data->consider_all_candidates)
6460             for (k = 0; k < n_iv_cands (data); k++)
6461               best_cp = cheaper_cost_with_cand (data, use, k,
6462                                                 old_cp->cand, best_cp);
6463           else
6464             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6465               best_cp = cheaper_cost_with_cand (data, use, k,
6466                                                 old_cp->cand, best_cp);
6467
6468           if (best_cp == old_cp)
6469             continue;
6470
6471           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6472         }
6473       /* No need for further prune.  */
6474       if (!act_delta)
6475         continue;
6476
6477       /* Prune the new candidate set.  */
6478       iv_ca_delta_commit (data, ivs, act_delta, true);
6479       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6480       iv_ca_delta_commit (data, ivs, act_delta, false);
6481       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6482
6483       if (compare_costs (acost, orig_cost) < 0)
6484         {
6485           *delta = act_delta;
6486           return acost;
6487         }
6488       else
6489         iv_ca_delta_free (&act_delta);
6490     }
6491
6492   return orig_cost;
6493 }
6494
6495 /* Tries to extend the sets IVS in the best possible way in order
6496    to express the USE.  If ORIGINALP is true, prefer candidates from
6497    the original set of IVs, otherwise favor important candidates not
6498    based on any memory object.  */
6499
6500 static bool
6501 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6502                   struct iv_use *use, bool originalp)
6503 {
6504   comp_cost best_cost, act_cost;
6505   unsigned i;
6506   bitmap_iterator bi;
6507   struct iv_cand *cand;
6508   struct iv_ca_delta *best_delta = NULL, *act_delta;
6509   struct cost_pair *cp;
6510
6511   iv_ca_add_use (data, ivs, use);
6512   best_cost = iv_ca_cost (ivs);
6513   cp = iv_ca_cand_for_use (ivs, use);
6514   if (cp)
6515     {
6516       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6517       iv_ca_set_no_cp (data, ivs, use);
6518     }
6519
6520   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6521      first try important candidates not based on any memory object.  Only if
6522      this fails, try the specific ones.  Rationale -- in loops with many
6523      variables the best choice often is to use just one generic biv.  If we
6524      added here many ivs specific to the uses, the optimization algorithm later
6525      would be likely to get stuck in a local minimum, thus causing us to create
6526      too many ivs.  The approach from few ivs to more seems more likely to be
6527      successful -- starting from few ivs, replacing an expensive use by a
6528      specific iv should always be a win.  */
6529   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6530     {
6531       cand = iv_cand (data, i);
6532
6533       if (originalp && cand->pos !=IP_ORIGINAL)
6534         continue;
6535
6536       if (!originalp && cand->iv->base_object != NULL_TREE)
6537         continue;
6538
6539       if (iv_ca_cand_used_p (ivs, cand))
6540         continue;
6541
6542       cp = get_use_iv_cost (data, use, cand);
6543       if (!cp)
6544         continue;
6545
6546       iv_ca_set_cp (data, ivs, use, cp);
6547       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6548                                true);
6549       iv_ca_set_no_cp (data, ivs, use);
6550       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6551
6552       if (compare_costs (act_cost, best_cost) < 0)
6553         {
6554           best_cost = act_cost;
6555
6556           iv_ca_delta_free (&best_delta);
6557           best_delta = act_delta;
6558         }
6559       else
6560         iv_ca_delta_free (&act_delta);
6561     }
6562
6563   if (infinite_cost_p (best_cost))
6564     {
6565       for (i = 0; i < use->n_map_members; i++)
6566         {
6567           cp = use->cost_map + i;
6568           cand = cp->cand;
6569           if (!cand)
6570             continue;
6571
6572           /* Already tried this.  */
6573           if (cand->important)
6574             {
6575               if (originalp && cand->pos == IP_ORIGINAL)
6576                 continue;
6577               if (!originalp && cand->iv->base_object == NULL_TREE)
6578                 continue;
6579             }
6580
6581           if (iv_ca_cand_used_p (ivs, cand))
6582             continue;
6583
6584           act_delta = NULL;
6585           iv_ca_set_cp (data, ivs, use, cp);
6586           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6587           iv_ca_set_no_cp (data, ivs, use);
6588           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6589                                        cp, act_delta);
6590
6591           if (compare_costs (act_cost, best_cost) < 0)
6592             {
6593               best_cost = act_cost;
6594
6595               if (best_delta)
6596                 iv_ca_delta_free (&best_delta);
6597               best_delta = act_delta;
6598             }
6599           else
6600             iv_ca_delta_free (&act_delta);
6601         }
6602     }
6603
6604   iv_ca_delta_commit (data, ivs, best_delta, true);
6605   iv_ca_delta_free (&best_delta);
6606
6607   return !infinite_cost_p (best_cost);
6608 }
6609
6610 /* Finds an initial assignment of candidates to uses.  */
6611
6612 static struct iv_ca *
6613 get_initial_solution (struct ivopts_data *data, bool originalp)
6614 {
6615   struct iv_ca *ivs = iv_ca_new (data);
6616   unsigned i;
6617
6618   for (i = 0; i < n_iv_uses (data); i++)
6619     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6620       {
6621         iv_ca_free (&ivs);
6622         return NULL;
6623       }
6624
6625   return ivs;
6626 }
6627
6628 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6629    points to a bool variable, this function tries to break local
6630    optimal fixed-point by replacing candidates in IVS if it's true.  */
6631
6632 static bool
6633 try_improve_iv_set (struct ivopts_data *data,
6634                     struct iv_ca *ivs, bool *try_replace_p)
6635 {
6636   unsigned i, n_ivs;
6637   comp_cost acost, best_cost = iv_ca_cost (ivs);
6638   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6639   struct iv_cand *cand;
6640
6641   /* Try extending the set of induction variables by one.  */
6642   for (i = 0; i < n_iv_cands (data); i++)
6643     {
6644       cand = iv_cand (data, i);
6645
6646       if (iv_ca_cand_used_p (ivs, cand))
6647         continue;
6648
6649       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6650       if (!act_delta)
6651         continue;
6652
6653       /* If we successfully added the candidate and the set is small enough,
6654          try optimizing it by removing other candidates.  */
6655       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6656         {
6657           iv_ca_delta_commit (data, ivs, act_delta, true);
6658           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6659           iv_ca_delta_commit (data, ivs, act_delta, false);
6660           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6661         }
6662
6663       if (compare_costs (acost, best_cost) < 0)
6664         {
6665           best_cost = acost;
6666           iv_ca_delta_free (&best_delta);
6667           best_delta = act_delta;
6668         }
6669       else
6670         iv_ca_delta_free (&act_delta);
6671     }
6672
6673   if (!best_delta)
6674     {
6675       /* Try removing the candidates from the set instead.  */
6676       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6677
6678       if (!best_delta && *try_replace_p)
6679         {
6680           *try_replace_p = false;
6681           /* So far candidate selecting algorithm tends to choose fewer IVs
6682              so that it can handle cases in which loops have many variables
6683              but the best choice is often to use only one general biv.  One
6684              weakness is it can't handle opposite cases, in which different
6685              candidates should be chosen with respect to each use.  To solve
6686              the problem, we replace candidates in a manner described by the
6687              comments of iv_ca_replace, thus give general algorithm a chance
6688              to break local optimal fixed-point in these cases.  */
6689           best_cost = iv_ca_replace (data, ivs, &best_delta);
6690         }
6691
6692       if (!best_delta)
6693         return false;
6694     }
6695
6696   iv_ca_delta_commit (data, ivs, best_delta, true);
6697   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6698   iv_ca_delta_free (&best_delta);
6699   return true;
6700 }
6701
6702 /* Attempts to find the optimal set of induction variables.  We do simple
6703    greedy heuristic -- we try to replace at most one candidate in the selected
6704    solution and remove the unused ivs while this improves the cost.  */
6705
6706 static struct iv_ca *
6707 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6708 {
6709   struct iv_ca *set;
6710   bool try_replace_p = true;
6711
6712   /* Get the initial solution.  */
6713   set = get_initial_solution (data, originalp);
6714   if (!set)
6715     {
6716       if (dump_file && (dump_flags & TDF_DETAILS))
6717         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6718       return NULL;
6719     }
6720
6721   if (dump_file && (dump_flags & TDF_DETAILS))
6722     {
6723       fprintf (dump_file, "Initial set of candidates:\n");
6724       iv_ca_dump (data, dump_file, set);
6725     }
6726
6727   while (try_improve_iv_set (data, set, &try_replace_p))
6728     {
6729       if (dump_file && (dump_flags & TDF_DETAILS))
6730         {
6731           fprintf (dump_file, "Improved to:\n");
6732           iv_ca_dump (data, dump_file, set);
6733         }
6734     }
6735
6736   return set;
6737 }
6738
6739 static struct iv_ca *
6740 find_optimal_iv_set (struct ivopts_data *data)
6741 {
6742   unsigned i;
6743   struct iv_ca *set, *origset;
6744   struct iv_use *use;
6745   comp_cost cost, origcost;
6746
6747   /* Determine the cost based on a strategy that starts with original IVs,
6748      and try again using a strategy that prefers candidates not based
6749      on any IVs.  */
6750   origset = find_optimal_iv_set_1 (data, true);
6751   set = find_optimal_iv_set_1 (data, false);
6752
6753   if (!origset && !set)
6754     return NULL;
6755
6756   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6757   cost = set ? iv_ca_cost (set) : infinite_cost;
6758
6759   if (dump_file && (dump_flags & TDF_DETAILS))
6760     {
6761       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6762                origcost.cost, origcost.complexity);
6763       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6764                cost.cost, cost.complexity);
6765     }
6766
6767   /* Choose the one with the best cost.  */
6768   if (compare_costs (origcost, cost) <= 0)
6769     {
6770       if (set)
6771         iv_ca_free (&set);
6772       set = origset;
6773     }
6774   else if (origset)
6775     iv_ca_free (&origset);
6776
6777   for (i = 0; i < n_iv_uses (data); i++)
6778     {
6779       use = iv_use (data, i);
6780       use->selected = iv_ca_cand_for_use (set, use)->cand;
6781     }
6782
6783   return set;
6784 }
6785
6786 /* Creates a new induction variable corresponding to CAND.  */
6787
6788 static void
6789 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6790 {
6791   gimple_stmt_iterator incr_pos;
6792   tree base;
6793   bool after = false;
6794
6795   if (!cand->iv)
6796     return;
6797
6798   switch (cand->pos)
6799     {
6800     case IP_NORMAL:
6801       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6802       break;
6803
6804     case IP_END:
6805       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6806       after = true;
6807       break;
6808
6809     case IP_AFTER_USE:
6810       after = true;
6811       /* fall through */
6812     case IP_BEFORE_USE:
6813       incr_pos = gsi_for_stmt (cand->incremented_at);
6814       break;
6815
6816     case IP_ORIGINAL:
6817       /* Mark that the iv is preserved.  */
6818       name_info (data, cand->var_before)->preserve_biv = true;
6819       name_info (data, cand->var_after)->preserve_biv = true;
6820
6821       /* Rewrite the increment so that it uses var_before directly.  */
6822       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6823       return;
6824     }
6825
6826   gimple_add_tmp_var (cand->var_before);
6827
6828   base = unshare_expr (cand->iv->base);
6829
6830   create_iv (base, unshare_expr (cand->iv->step),
6831              cand->var_before, data->current_loop,
6832              &incr_pos, after, &cand->var_before, &cand->var_after);
6833 }
6834
6835 /* Creates new induction variables described in SET.  */
6836
6837 static void
6838 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6839 {
6840   unsigned i;
6841   struct iv_cand *cand;
6842   bitmap_iterator bi;
6843
6844   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6845     {
6846       cand = iv_cand (data, i);
6847       create_new_iv (data, cand);
6848     }
6849
6850   if (dump_file && (dump_flags & TDF_DETAILS))
6851     {
6852       fprintf (dump_file, "Selected IV set for loop %d",
6853                data->current_loop->num);
6854       if (data->loop_loc != UNKNOWN_LOCATION)
6855         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6856                  LOCATION_LINE (data->loop_loc));
6857       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6858       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6859         {
6860           cand = iv_cand (data, i);
6861           dump_cand (dump_file, cand);
6862         }
6863       fprintf (dump_file, "\n");
6864     }
6865 }
6866
6867 /* Rewrites USE (definition of iv used in a nonlinear expression)
6868    using candidate CAND.  */
6869
6870 static void
6871 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6872                             struct iv_use *use, struct iv_cand *cand)
6873 {
6874   tree comp;
6875   tree op, tgt;
6876   gassign *ass;
6877   gimple_stmt_iterator bsi;
6878
6879   /* An important special case -- if we are asked to express value of
6880      the original iv by itself, just exit; there is no need to
6881      introduce a new computation (that might also need casting the
6882      variable to unsigned and back).  */
6883   if (cand->pos == IP_ORIGINAL
6884       && cand->incremented_at == use->stmt)
6885     {
6886       enum tree_code stmt_code;
6887
6888       gcc_assert (is_gimple_assign (use->stmt));
6889       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6890
6891       /* Check whether we may leave the computation unchanged.
6892          This is the case only if it does not rely on other
6893          computations in the loop -- otherwise, the computation
6894          we rely upon may be removed in remove_unused_ivs,
6895          thus leading to ICE.  */
6896       stmt_code = gimple_assign_rhs_code (use->stmt);
6897       if (stmt_code == PLUS_EXPR
6898           || stmt_code == MINUS_EXPR
6899           || stmt_code == POINTER_PLUS_EXPR)
6900         {
6901           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6902             op = gimple_assign_rhs2 (use->stmt);
6903           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6904             op = gimple_assign_rhs1 (use->stmt);
6905           else
6906             op = NULL_TREE;
6907         }
6908       else
6909         op = NULL_TREE;
6910
6911       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6912         return;
6913     }
6914
6915   comp = get_computation (data->current_loop, use, cand);
6916   gcc_assert (comp != NULL_TREE);
6917
6918   switch (gimple_code (use->stmt))
6919     {
6920     case GIMPLE_PHI:
6921       tgt = PHI_RESULT (use->stmt);
6922
6923       /* If we should keep the biv, do not replace it.  */
6924       if (name_info (data, tgt)->preserve_biv)
6925         return;
6926
6927       bsi = gsi_after_labels (gimple_bb (use->stmt));
6928       break;
6929
6930     case GIMPLE_ASSIGN:
6931       tgt = gimple_assign_lhs (use->stmt);
6932       bsi = gsi_for_stmt (use->stmt);
6933       break;
6934
6935     default:
6936       gcc_unreachable ();
6937     }
6938
6939   if (!valid_gimple_rhs_p (comp)
6940       || (gimple_code (use->stmt) != GIMPLE_PHI
6941           /* We can't allow re-allocating the stmt as it might be pointed
6942              to still.  */
6943           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6944               >= gimple_num_ops (gsi_stmt (bsi)))))
6945     {
6946       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6947                                        true, GSI_SAME_STMT);
6948       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6949         {
6950           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6951           /* As this isn't a plain copy we have to reset alignment
6952              information.  */
6953           if (SSA_NAME_PTR_INFO (comp))
6954             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6955         }
6956     }
6957
6958   if (gimple_code (use->stmt) == GIMPLE_PHI)
6959     {
6960       ass = gimple_build_assign (tgt, comp);
6961       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6962
6963       bsi = gsi_for_stmt (use->stmt);
6964       remove_phi_node (&bsi, false);
6965     }
6966   else
6967     {
6968       gimple_assign_set_rhs_from_tree (&bsi, comp);
6969       use->stmt = gsi_stmt (bsi);
6970     }
6971 }
6972
6973 /* Performs a peephole optimization to reorder the iv update statement with
6974    a mem ref to enable instruction combining in later phases. The mem ref uses
6975    the iv value before the update, so the reordering transformation requires
6976    adjustment of the offset. CAND is the selected IV_CAND.
6977
6978    Example:
6979
6980    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6981    iv2 = iv1 + 1;
6982
6983    if (t < val)      (1)
6984      goto L;
6985    goto Head;
6986
6987
6988    directly propagating t over to (1) will introduce overlapping live range
6989    thus increase register pressure. This peephole transform it into:
6990
6991
6992    iv2 = iv1 + 1;
6993    t = MEM_REF (base, iv2, 8, 8);
6994    if (t < val)
6995      goto L;
6996    goto Head;
6997 */
6998
6999 static void
7000 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7001 {
7002   tree var_after;
7003   gimple *iv_update, *stmt;
7004   basic_block bb;
7005   gimple_stmt_iterator gsi, gsi_iv;
7006
7007   if (cand->pos != IP_NORMAL)
7008     return;
7009
7010   var_after = cand->var_after;
7011   iv_update = SSA_NAME_DEF_STMT (var_after);
7012
7013   bb = gimple_bb (iv_update);
7014   gsi = gsi_last_nondebug_bb (bb);
7015   stmt = gsi_stmt (gsi);
7016
7017   /* Only handle conditional statement for now.  */
7018   if (gimple_code (stmt) != GIMPLE_COND)
7019     return;
7020
7021   gsi_prev_nondebug (&gsi);
7022   stmt = gsi_stmt (gsi);
7023   if (stmt != iv_update)
7024     return;
7025
7026   gsi_prev_nondebug (&gsi);
7027   if (gsi_end_p (gsi))
7028     return;
7029
7030   stmt = gsi_stmt (gsi);
7031   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7032     return;
7033
7034   if (stmt != use->stmt)
7035     return;
7036
7037   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7038     return;
7039
7040   if (dump_file && (dump_flags & TDF_DETAILS))
7041     {
7042       fprintf (dump_file, "Reordering \n");
7043       print_gimple_stmt (dump_file, iv_update, 0, 0);
7044       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7045       fprintf (dump_file, "\n");
7046     }
7047
7048   gsi = gsi_for_stmt (use->stmt);
7049   gsi_iv = gsi_for_stmt (iv_update);
7050   gsi_move_before (&gsi_iv, &gsi);
7051
7052   cand->pos = IP_BEFORE_USE;
7053   cand->incremented_at = use->stmt;
7054 }
7055
7056 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7057
7058 static void
7059 rewrite_use_address_1 (struct ivopts_data *data,
7060                        struct iv_use *use, struct iv_cand *cand)
7061 {
7062   aff_tree aff;
7063   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7064   tree base_hint = NULL_TREE;
7065   tree ref, iv;
7066   bool ok;
7067
7068   adjust_iv_update_pos (cand, use);
7069   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7070   gcc_assert (ok);
7071   unshare_aff_combination (&aff);
7072
7073   /* To avoid undefined overflow problems, all IV candidates use unsigned
7074      integer types.  The drawback is that this makes it impossible for
7075      create_mem_ref to distinguish an IV that is based on a memory object
7076      from one that represents simply an offset.
7077
7078      To work around this problem, we pass a hint to create_mem_ref that
7079      indicates which variable (if any) in aff is an IV based on a memory
7080      object.  Note that we only consider the candidate.  If this is not
7081      based on an object, the base of the reference is in some subexpression
7082      of the use -- but these will use pointer types, so they are recognized
7083      by the create_mem_ref heuristics anyway.  */
7084   if (cand->iv->base_object)
7085     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7086
7087   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7088   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7089                         reference_alias_ptr_type (*use->op_p),
7090                         iv, base_hint, data->speed);
7091   copy_ref_info (ref, *use->op_p);
7092   *use->op_p = ref;
7093 }
7094
7095 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7096    first use of a group, rewrites sub uses in the group too.  */
7097
7098 static void
7099 rewrite_use_address (struct ivopts_data *data,
7100                       struct iv_use *use, struct iv_cand *cand)
7101 {
7102   struct iv_use *next;
7103
7104   gcc_assert (use->sub_id == 0);
7105   rewrite_use_address_1 (data, use, cand);
7106   update_stmt (use->stmt);
7107
7108   for (next = use->next; next != NULL; next = next->next)
7109     {
7110       rewrite_use_address_1 (data, next, cand);
7111       update_stmt (next->stmt);
7112     }
7113
7114   return;
7115 }
7116
7117 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7118    candidate CAND.  */
7119
7120 static void
7121 rewrite_use_compare (struct ivopts_data *data,
7122                      struct iv_use *use, struct iv_cand *cand)
7123 {
7124   tree comp, *var_p, op, bound;
7125   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7126   enum tree_code compare;
7127   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7128   bool ok;
7129
7130   bound = cp->value;
7131   if (bound)
7132     {
7133       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7134       tree var_type = TREE_TYPE (var);
7135       gimple_seq stmts;
7136
7137       if (dump_file && (dump_flags & TDF_DETAILS))
7138         {
7139           fprintf (dump_file, "Replacing exit test: ");
7140           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7141         }
7142       compare = cp->comp;
7143       bound = unshare_expr (fold_convert (var_type, bound));
7144       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7145       if (stmts)
7146         gsi_insert_seq_on_edge_immediate (
7147                 loop_preheader_edge (data->current_loop),
7148                 stmts);
7149
7150       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7151       gimple_cond_set_lhs (cond_stmt, var);
7152       gimple_cond_set_code (cond_stmt, compare);
7153       gimple_cond_set_rhs (cond_stmt, op);
7154       return;
7155     }
7156
7157   /* The induction variable elimination failed; just express the original
7158      giv.  */
7159   comp = get_computation (data->current_loop, use, cand);
7160   gcc_assert (comp != NULL_TREE);
7161
7162   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7163   gcc_assert (ok);
7164
7165   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7166                                      true, GSI_SAME_STMT);
7167 }
7168
7169 /* Rewrites USE using candidate CAND.  */
7170
7171 static void
7172 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7173 {
7174   switch (use->type)
7175     {
7176       case USE_NONLINEAR_EXPR:
7177         rewrite_use_nonlinear_expr (data, use, cand);
7178         break;
7179
7180       case USE_ADDRESS:
7181         rewrite_use_address (data, use, cand);
7182         break;
7183
7184       case USE_COMPARE:
7185         rewrite_use_compare (data, use, cand);
7186         break;
7187
7188       default:
7189         gcc_unreachable ();
7190     }
7191
7192   update_stmt (use->stmt);
7193 }
7194
7195 /* Rewrite the uses using the selected induction variables.  */
7196
7197 static void
7198 rewrite_uses (struct ivopts_data *data)
7199 {
7200   unsigned i;
7201   struct iv_cand *cand;
7202   struct iv_use *use;
7203
7204   for (i = 0; i < n_iv_uses (data); i++)
7205     {
7206       use = iv_use (data, i);
7207       cand = use->selected;
7208       gcc_assert (cand);
7209
7210       rewrite_use (data, use, cand);
7211     }
7212 }
7213
7214 /* Removes the ivs that are not used after rewriting.  */
7215
7216 static void
7217 remove_unused_ivs (struct ivopts_data *data)
7218 {
7219   unsigned j;
7220   bitmap_iterator bi;
7221   bitmap toremove = BITMAP_ALLOC (NULL);
7222
7223   /* Figure out an order in which to release SSA DEFs so that we don't
7224      release something that we'd have to propagate into a debug stmt
7225      afterwards.  */
7226   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7227     {
7228       struct version_info *info;
7229
7230       info = ver_info (data, j);
7231       if (info->iv
7232           && !integer_zerop (info->iv->step)
7233           && !info->inv_id
7234           && !info->iv->have_use_for
7235           && !info->preserve_biv)
7236         {
7237           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7238
7239           tree def = info->iv->ssa_name;
7240
7241           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7242             {
7243               imm_use_iterator imm_iter;
7244               use_operand_p use_p;
7245               gimple *stmt;
7246               int count = 0;
7247
7248               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7249                 {
7250                   if (!gimple_debug_bind_p (stmt))
7251                     continue;
7252
7253                   /* We just want to determine whether to do nothing
7254                      (count == 0), to substitute the computed
7255                      expression into a single use of the SSA DEF by
7256                      itself (count == 1), or to use a debug temp
7257                      because the SSA DEF is used multiple times or as
7258                      part of a larger expression (count > 1). */
7259                   count++;
7260                   if (gimple_debug_bind_get_value (stmt) != def)
7261                     count++;
7262
7263                   if (count > 1)
7264                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7265                 }
7266
7267               if (!count)
7268                 continue;
7269
7270               struct iv_use dummy_use;
7271               struct iv_cand *best_cand = NULL, *cand;
7272               unsigned i, best_pref = 0, cand_pref;
7273
7274               memset (&dummy_use, 0, sizeof (dummy_use));
7275               dummy_use.iv = info->iv;
7276               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7277                 {
7278                   cand = iv_use (data, i)->selected;
7279                   if (cand == best_cand)
7280                     continue;
7281                   cand_pref = operand_equal_p (cand->iv->step,
7282                                                info->iv->step, 0)
7283                     ? 4 : 0;
7284                   cand_pref
7285                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7286                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7287                     ? 2 : 0;
7288                   cand_pref
7289                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7290                     ? 1 : 0;
7291                   if (best_cand == NULL || best_pref < cand_pref)
7292                     {
7293                       best_cand = cand;
7294                       best_pref = cand_pref;
7295                     }
7296                 }
7297
7298               if (!best_cand)
7299                 continue;
7300
7301               tree comp = get_computation_at (data->current_loop,
7302                                               &dummy_use, best_cand,
7303                                               SSA_NAME_DEF_STMT (def));
7304               if (!comp)
7305                 continue;
7306
7307               if (count > 1)
7308                 {
7309                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7310                   DECL_ARTIFICIAL (vexpr) = 1;
7311                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7312                   if (SSA_NAME_VAR (def))
7313                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7314                   else
7315                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7316                   gdebug *def_temp
7317                     = gimple_build_debug_bind (vexpr, comp, NULL);
7318                   gimple_stmt_iterator gsi;
7319
7320                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7321                     gsi = gsi_after_labels (gimple_bb
7322                                             (SSA_NAME_DEF_STMT (def)));
7323                   else
7324                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7325
7326                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7327                   comp = vexpr;
7328                 }
7329
7330               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7331                 {
7332                   if (!gimple_debug_bind_p (stmt))
7333                     continue;
7334
7335                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7336                     SET_USE (use_p, comp);
7337
7338                   update_stmt (stmt);
7339                 }
7340             }
7341         }
7342     }
7343
7344   release_defs_bitset (toremove);
7345
7346   BITMAP_FREE (toremove);
7347 }
7348
7349 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7350    for hash_map::traverse.  */
7351
7352 bool
7353 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7354 {
7355   free (value);
7356   return true;
7357 }
7358
7359 /* Frees data allocated by the optimization of a single loop.  */
7360
7361 static void
7362 free_loop_data (struct ivopts_data *data)
7363 {
7364   unsigned i, j;
7365   bitmap_iterator bi;
7366   tree obj;
7367
7368   if (data->niters)
7369     {
7370       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7371       delete data->niters;
7372       data->niters = NULL;
7373     }
7374
7375   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7376     {
7377       struct version_info *info;
7378
7379       info = ver_info (data, i);
7380       info->iv = NULL;
7381       info->has_nonlin_use = false;
7382       info->preserve_biv = false;
7383       info->inv_id = 0;
7384     }
7385   bitmap_clear (data->relevant);
7386   bitmap_clear (data->important_candidates);
7387
7388   for (i = 0; i < n_iv_uses (data); i++)
7389     {
7390       struct iv_use *use = iv_use (data, i);
7391       struct iv_use *pre = use, *sub = use->next;
7392
7393       while (sub)
7394         {
7395           gcc_assert (sub->related_cands == NULL);
7396           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7397
7398           pre = sub;
7399           sub = sub->next;
7400           free (pre);
7401         }
7402
7403       BITMAP_FREE (use->related_cands);
7404       for (j = 0; j < use->n_map_members; j++)
7405         if (use->cost_map[j].depends_on)
7406           BITMAP_FREE (use->cost_map[j].depends_on);
7407       free (use->cost_map);
7408       free (use);
7409     }
7410   data->iv_uses.truncate (0);
7411
7412   for (i = 0; i < n_iv_cands (data); i++)
7413     {
7414       struct iv_cand *cand = iv_cand (data, i);
7415
7416       if (cand->depends_on)
7417         BITMAP_FREE (cand->depends_on);
7418       free (cand);
7419     }
7420   data->iv_candidates.truncate (0);
7421
7422   if (data->version_info_size < num_ssa_names)
7423     {
7424       data->version_info_size = 2 * num_ssa_names;
7425       free (data->version_info);
7426       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7427     }
7428
7429   data->max_inv_id = 0;
7430
7431   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7432     SET_DECL_RTL (obj, NULL_RTX);
7433
7434   decl_rtl_to_reset.truncate (0);
7435
7436   data->inv_expr_tab->empty ();
7437   data->inv_expr_id = 0;
7438 }
7439
7440 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7441    loop tree.  */
7442
7443 static void
7444 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7445 {
7446   free_loop_data (data);
7447   free (data->version_info);
7448   BITMAP_FREE (data->relevant);
7449   BITMAP_FREE (data->important_candidates);
7450
7451   decl_rtl_to_reset.release ();
7452   data->iv_uses.release ();
7453   data->iv_candidates.release ();
7454   delete data->inv_expr_tab;
7455   data->inv_expr_tab = NULL;
7456   free_affine_expand_cache (&data->name_expansion_cache);
7457   obstack_free (&data->iv_obstack, NULL);
7458 }
7459
7460 /* Returns true if the loop body BODY includes any function calls.  */
7461
7462 static bool
7463 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7464 {
7465   gimple_stmt_iterator gsi;
7466   unsigned i;
7467
7468   for (i = 0; i < num_nodes; i++)
7469     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7470       {
7471         gimple *stmt = gsi_stmt (gsi);
7472         if (is_gimple_call (stmt)
7473             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7474           return true;
7475       }
7476   return false;
7477 }
7478
7479 /* Optimizes the LOOP.  Returns true if anything changed.  */
7480
7481 static bool
7482 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7483 {
7484   bool changed = false;
7485   struct iv_ca *iv_ca;
7486   edge exit = single_dom_exit (loop);
7487   basic_block *body;
7488
7489   gcc_assert (!data->niters);
7490   data->current_loop = loop;
7491   data->loop_loc = find_loop_location (loop);
7492   data->speed = optimize_loop_for_speed_p (loop);
7493
7494   if (dump_file && (dump_flags & TDF_DETAILS))
7495     {
7496       fprintf (dump_file, "Processing loop %d", loop->num);
7497       if (data->loop_loc != UNKNOWN_LOCATION)
7498         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7499                  LOCATION_LINE (data->loop_loc));
7500       fprintf (dump_file, "\n");
7501
7502       if (exit)
7503         {
7504           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7505                    exit->src->index, exit->dest->index);
7506           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7507           fprintf (dump_file, "\n");
7508         }
7509
7510       fprintf (dump_file, "\n");
7511     }
7512
7513   body = get_loop_body (loop);
7514   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7515   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7516   free (body);
7517
7518   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7519
7520   /* For each ssa name determines whether it behaves as an induction variable
7521      in some loop.  */
7522   if (!find_induction_variables (data))
7523     goto finish;
7524
7525   /* Finds interesting uses (item 1).  */
7526   find_interesting_uses (data);
7527   group_address_uses (data);
7528   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7529     goto finish;
7530
7531   /* Finds candidates for the induction variables (item 2).  */
7532   find_iv_candidates (data);
7533
7534   /* Calculates the costs (item 3, part 1).  */
7535   determine_iv_costs (data);
7536   determine_use_iv_costs (data);
7537   determine_set_costs (data);
7538
7539   /* Find the optimal set of induction variables (item 3, part 2).  */
7540   iv_ca = find_optimal_iv_set (data);
7541   if (!iv_ca)
7542     goto finish;
7543   changed = true;
7544
7545   /* Create the new induction variables (item 4, part 1).  */
7546   create_new_ivs (data, iv_ca);
7547   iv_ca_free (&iv_ca);
7548
7549   /* Rewrite the uses (item 4, part 2).  */
7550   rewrite_uses (data);
7551
7552   /* Remove the ivs that are unused after rewriting.  */
7553   remove_unused_ivs (data);
7554
7555   /* We have changed the structure of induction variables; it might happen
7556      that definitions in the scev database refer to some of them that were
7557      eliminated.  */
7558   scev_reset ();
7559
7560 finish:
7561   free_loop_data (data);
7562
7563   return changed;
7564 }
7565
7566 /* Main entry point.  Optimizes induction variables in loops.  */
7567
7568 void
7569 tree_ssa_iv_optimize (void)
7570 {
7571   struct loop *loop;
7572   struct ivopts_data data;
7573
7574   tree_ssa_iv_optimize_init (&data);
7575
7576   /* Optimize the loops starting with the innermost ones.  */
7577   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7578     {
7579       if (dump_file && (dump_flags & TDF_DETAILS))
7580         flow_loop_dump (loop, dump_file, NULL, 1);
7581
7582       tree_ssa_iv_optimize_loop (&data, loop);
7583     }
7584
7585   tree_ssa_iv_optimize_finalize (&data);
7586 }